summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig6
-rw-r--r--arch/alpha/kernel/rtc.c2
-rw-r--r--arch/alpha/kernel/srmcons.c34
-rw-r--r--arch/arc/include/asm/cacheflush.h1
-rw-r--r--arch/arc/mm/dma.c2
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts4
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts4
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts2
-rw-r--r--arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi4
-rw-r--r--arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi5
-rw-r--r--arch/arm/boot/dts/broadcom/bcm2711.dtsi14
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts2
-rw-r--r--arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi1
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi2
-rw-r--r--arch/arm/common/locomo.c4
-rw-r--r--arch/arm/configs/mxs_defconfig3
-rw-r--r--arch/arm/include/asm/cacheflush.h2
-rw-r--r--arch/arm/include/asm/hardware/locomo.h2
-rw-r--r--arch/arm/include/asm/pgtable.h2
-rw-r--r--arch/arm/mach-davinci/Kconfig1
-rw-r--r--arch/arm/mm/dma-mapping-nommu.c2
-rw-r--r--arch/arm/mm/dma-mapping.c15
-rw-r--r--arch/arm/mm/fault.c30
-rw-r--r--arch/arm/mm/kasan_init.c8
-rw-r--r--arch/arm/vfp/vfpmodule.c18
-rw-r--r--arch/arm64/Kconfig19
-rw-r--r--arch/arm64/boot/dts/exynos/google/gs101.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-evk.dts22
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916.dtsi9
-rw-r--r--arch/arm64/boot/dts/qcom/msm8939.dtsi12
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi43
-rw-r--r--arch/arm64/boot/dts/qcom/msm8998.dtsi5
-rw-r--r--arch/arm64/boot/dts/qcom/qcs404.dtsi9
-rw-r--r--arch/arm64/boot/dts/qcom/qrb5165-rb5.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm630.dtsi53
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi2
-rw-r--r--arch/arm64/include/asm/assembler.h25
-rw-r--r--arch/arm64/include/asm/cpu.h1
-rw-r--r--arch/arm64/include/asm/esr.h15
-rw-r--r--arch/arm64/include/asm/irq.h2
-rw-r--r--arch/arm64/include/asm/kvm_arm.h63
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h34
-rw-r--r--arch/arm64/include/asm/kvm_host.h140
-rw-r--r--arch/arm64/include/asm/kvm_nested.h56
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h80
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h5
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h103
-rw-r--r--arch/arm64/kernel/Makefile8
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/cpu_errata.c21
-rw-r--r--arch/arm64/kernel/cpufeature.c2
-rw-r--r--arch/arm64/kernel/entry.S25
-rw-r--r--arch/arm64/kernel/fpsimd.c12
-rw-r--r--arch/arm64/kernel/ptrace.c13
-rw-r--r--arch/arm64/kernel/setup.c13
-rw-r--r--arch/arm64/kvm/Kconfig7
-rw-r--r--arch/arm64/kvm/arch_timer.c3
-rw-r--r--arch/arm64/kvm/arm.c12
-rw-r--r--arch/arm64/kvm/emulate-nested.c63
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h93
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h22
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c90
-rw-r--r--arch/arm64/kvm/mmu.c49
-rw-r--r--arch/arm64/kvm/nested.c22
-rw-r--r--arch/arm64/kvm/reset.c9
-rw-r--r--arch/arm64/kvm/sys_regs.c235
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c28
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c101
-rw-r--r--arch/arm64/mm/dma-mapping.c4
-rw-r--r--arch/arm64/tools/cpucaps2
-rw-r--r--arch/csky/abiv1/inc/abi/cacheflush.h1
-rw-r--r--arch/csky/abiv2/inc/abi/cacheflush.h1
-rw-r--r--arch/csky/configs/defconfig3
-rw-r--r--arch/loongarch/Kbuild1
-rw-r--r--arch/loongarch/Kconfig24
-rw-r--r--arch/loongarch/Makefile6
-rw-r--r--arch/loongarch/boot/dts/Makefile5
-rw-r--r--arch/loongarch/boot/dts/loongson-2k0500-ref.dts88
-rw-r--r--arch/loongarch/boot/dts/loongson-2k0500.dtsi266
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000-ref.dts183
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000.dtsi492
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000-ref.dts72
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000.dtsi300
-rw-r--r--arch/loongarch/configs/loongson3_defconfig55
-rw-r--r--arch/loongarch/include/asm/bootinfo.h6
-rw-r--r--arch/loongarch/include/asm/crash_core.h12
-rw-r--r--arch/loongarch/include/asm/elf.h5
-rw-r--r--arch/loongarch/include/asm/ftrace.h2
-rw-r--r--arch/loongarch/include/asm/kvm_host.h25
-rw-r--r--arch/loongarch/include/asm/kvm_vcpu.h21
-rw-r--r--arch/loongarch/include/asm/shmparam.h12
-rw-r--r--arch/loongarch/include/uapi/asm/kvm.h1
-rw-r--r--arch/loongarch/kernel/acpi.c2
-rw-r--r--arch/loongarch/kernel/efi.c2
-rw-r--r--arch/loongarch/kernel/elf.c5
-rw-r--r--arch/loongarch/kernel/env.c34
-rw-r--r--arch/loongarch/kernel/fpu.S2
-rw-r--r--arch/loongarch/kernel/head.S10
-rw-r--r--arch/loongarch/kernel/process.c1
-rw-r--r--arch/loongarch/kernel/setup.c56
-rw-r--r--arch/loongarch/kernel/smp.c6
-rw-r--r--arch/loongarch/kernel/topology.c42
-rw-r--r--arch/loongarch/kvm/Kconfig5
-rw-r--r--arch/loongarch/kvm/exit.c50
-rw-r--r--arch/loongarch/kvm/main.c1
-rw-r--r--arch/loongarch/kvm/mmu.c128
-rw-r--r--arch/loongarch/kvm/switch.S31
-rw-r--r--arch/loongarch/kvm/timer.c129
-rw-r--r--arch/loongarch/kvm/trace.h6
-rw-r--r--arch/loongarch/kvm/vcpu.c307
-rw-r--r--arch/loongarch/mm/tlb.c16
-rw-r--r--arch/loongarch/net/bpf_jit.c10
-rw-r--r--arch/m68k/emu/nfcon.c4
-rw-r--r--arch/m68k/include/asm/cacheflush_mm.h1
-rw-r--r--arch/microblaze/configs/mmu_defconfig13
-rw-r--r--arch/mips/alchemy/common/prom.c1
-rw-r--r--arch/mips/alchemy/common/setup.c4
-rw-r--r--arch/mips/alchemy/devboards/db1200.c2
-rw-r--r--arch/mips/alchemy/devboards/db1550.c2
-rw-r--r--arch/mips/bcm47xx/buttons.c6
-rw-r--r--arch/mips/bcm63xx/boards/board_bcm963xx.c2
-rw-r--r--arch/mips/bcm63xx/clk.c4
-rw-r--r--arch/mips/bcm63xx/dev-rng.c2
-rw-r--r--arch/mips/bcm63xx/dev-uart.c1
-rw-r--r--arch/mips/bcm63xx/dev-wdt.c2
-rw-r--r--arch/mips/bcm63xx/irq.c2
-rw-r--r--arch/mips/bcm63xx/setup.c2
-rw-r--r--arch/mips/bcm63xx/timer.c2
-rw-r--r--arch/mips/boot/compressed/dbg.c2
-rw-r--r--arch/mips/boot/compressed/head.S4
-rw-r--r--arch/mips/boot/elf2ecoff.c2
-rw-r--r--arch/mips/cavium-octeon/csrc-octeon.c2
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-boot-vector.c2
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-bootmem.c2
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c4
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c2
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-pko.c2
-rw-r--r--arch/mips/cavium-octeon/octeon-platform.c2
-rw-r--r--arch/mips/cobalt/setup.c3
-rw-r--r--arch/mips/configs/ip27_defconfig3
-rw-r--r--arch/mips/configs/lemote2f_defconfig3
-rw-r--r--arch/mips/configs/loongson3_defconfig3
-rw-r--r--arch/mips/configs/pic32mzda_defconfig3
-rw-r--r--arch/mips/fw/arc/memory.c2
-rw-r--r--arch/mips/fw/arc/promlib.c6
-rw-r--r--arch/mips/include/asm/cacheflush.h2
-rw-r--r--arch/mips/include/asm/debug.h2
-rw-r--r--arch/mips/include/asm/dmi.h2
-rw-r--r--arch/mips/include/asm/io.h4
-rw-r--r--arch/mips/include/asm/kvm_host.h2
-rw-r--r--arch/mips/include/asm/mach-au1x00/au1000.h3
-rw-r--r--arch/mips/include/asm/mach-au1x00/au1000_dma.h2
-rw-r--r--arch/mips/include/asm/mach-au1x00/gpio-au1000.h2
-rw-r--r--arch/mips/include/asm/mach-cobalt/cobalt.h3
-rw-r--r--arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h2
-rw-r--r--arch/mips/include/asm/mach-loongson64/loongson_hwmon.h2
-rw-r--r--arch/mips/include/asm/mach-loongson64/loongson_regs.h2
-rw-r--r--arch/mips/include/asm/mach-malta/spaces.h4
-rw-r--r--arch/mips/include/asm/mips-boards/bonito64.h2
-rw-r--r--arch/mips/include/asm/mips-cpc.h2
-rw-r--r--arch/mips/include/asm/mipsregs.h4
-rw-r--r--arch/mips/include/asm/octeon/cvmx-bootinfo.h2
-rw-r--r--arch/mips/include/asm/octeon/cvmx-cmd-queue.h6
-rw-r--r--arch/mips/include/asm/octeon/cvmx-pko.h2
-rw-r--r--arch/mips/include/asm/octeon/cvmx-pow.h4
-rw-r--r--arch/mips/include/asm/octeon/octeon-model.h4
-rw-r--r--arch/mips/include/asm/page.h2
-rw-r--r--arch/mips/include/asm/pci.h2
-rw-r--r--arch/mips/include/asm/pgtable-bits.h2
-rw-r--r--arch/mips/include/asm/sgi/mc.h2
-rw-r--r--arch/mips/include/asm/sn/klconfig.h2
-rw-r--r--arch/mips/include/asm/sync.h2
-rw-r--r--arch/mips/include/asm/thread_info.h2
-rw-r--r--arch/mips/include/asm/timex.h2
-rw-r--r--arch/mips/include/asm/vdso/vdso.h2
-rw-r--r--arch/mips/include/uapi/asm/mman.h2
-rw-r--r--arch/mips/include/uapi/asm/msgbuf.h2
-rw-r--r--arch/mips/kernel/cpu-probe.c2
-rw-r--r--arch/mips/kernel/elf.c6
-rw-r--r--arch/mips/kernel/genex.S8
-rw-r--r--arch/mips/kernel/kprobes.c2
-rw-r--r--arch/mips/kernel/prom.c2
-rw-r--r--arch/mips/kernel/relocate.c2
-rw-r--r--arch/mips/kernel/relocate_kernel.S2
-rw-r--r--arch/mips/kernel/setup.c6
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/traps.c101
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mips/kvm/Kconfig6
-rw-r--r--arch/mips/kvm/emulate.c2
-rw-r--r--arch/mips/lantiq/prom.c7
-rw-r--r--arch/mips/loongson2ef/common/platform.c2
-rw-r--r--arch/mips/loongson64/init.c3
-rw-r--r--arch/mips/loongson64/numa.c2
-rw-r--r--arch/mips/loongson64/smp.c2
-rw-r--r--arch/mips/mm/c-r4k.c2
-rw-r--r--arch/mips/mm/cex-gen.S2
-rw-r--r--arch/mips/mm/dma-noncoherent.c2
-rw-r--r--arch/mips/mm/init.c16
-rw-r--r--arch/mips/mm/ioremap.c4
-rw-r--r--arch/mips/mm/tlb-r3k.c2
-rw-r--r--arch/mips/mm/tlb-r4k.c2
-rw-r--r--arch/mips/mm/tlbex.c4
-rw-r--r--arch/mips/net/bpf_jit_comp32.c2
-rw-r--r--arch/mips/pci/ops-loongson2.c2
-rw-r--r--arch/mips/pci/pci-alchemy.c2
-rw-r--r--arch/mips/pci/pci-ar2315.c2
-rw-r--r--arch/mips/pci/pci-lantiq.c2
-rw-r--r--arch/mips/pci/pci-octeon.c2
-rw-r--r--arch/mips/pci/pci-xtalk-bridge.c2
-rw-r--r--arch/mips/pci/pcie-octeon.c2
-rw-r--r--arch/mips/ralink/mt7621.c2
-rw-r--r--arch/mips/sgi-ip27/Makefile2
-rw-r--r--arch/mips/sgi-ip27/ip27-berr.c4
-rw-r--r--arch/mips/sgi-ip27/ip27-common.h2
-rw-r--r--arch/mips/sgi-ip27/ip27-hubio.c185
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c1
-rw-r--r--arch/mips/sgi-ip27/ip27-nmi.c25
-rw-r--r--arch/mips/sgi-ip30/ip30-console.c1
-rw-r--r--arch/mips/sgi-ip30/ip30-setup.c1
-rw-r--r--arch/mips/sgi-ip32/crime.c6
-rw-r--r--arch/mips/sgi-ip32/ip32-berr.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-common.h15
-rw-r--r--arch/mips/sgi-ip32/ip32-irq.c6
-rw-r--r--arch/mips/sgi-ip32/ip32-memory.c1
-rw-r--r--arch/mips/sgi-ip32/ip32-reset.c2
-rw-r--r--arch/mips/sgi-ip32/ip32-setup.c3
-rw-r--r--arch/mips/txx9/generic/pci.c2
-rw-r--r--arch/nios2/include/asm/cacheflush.h1
-rw-r--r--arch/parisc/include/asm/cacheflush.h1
-rw-r--r--arch/parisc/kernel/firmware.c4
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/hvconsole.h4
-rw-r--r--arch/powerpc/include/asm/hvsi.h18
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/opal.h8
-rw-r--r--arch/powerpc/kvm/Kconfig14
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c10
-rw-r--r--arch/powerpc/platforms/powernv/opal.c14
-rw-r--r--arch/powerpc/platforms/pseries/hvconsole.c4
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c4
-rw-r--r--arch/riscv/Kconfig86
-rw-r--r--arch/riscv/Kconfig.errata1
-rw-r--r--arch/riscv/Makefile8
-rw-r--r--arch/riscv/boot/dts/sophgo/sg2042.dtsi80
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/configs/rv32_defconfig139
-rw-r--r--arch/riscv/errata/thead/errata.c69
-rw-r--r--arch/riscv/include/asm/arch_hweight.h78
-rw-r--r--arch/riscv/include/asm/archrandom.h72
-rw-r--r--arch/riscv/include/asm/asm-extable.h15
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h27
-rw-r--r--arch/riscv/include/asm/bitops.h4
-rw-r--r--arch/riscv/include/asm/cacheflush.h3
-rw-r--r--arch/riscv/include/asm/checksum.h93
-rw-r--r--arch/riscv/include/asm/cpu_ops.h14
-rw-r--r--arch/riscv/include/asm/cpufeature.h6
-rw-r--r--arch/riscv/include/asm/csr.h9
-rw-r--r--arch/riscv/include/asm/entry-common.h17
-rw-r--r--arch/riscv/include/asm/errata_list.h50
-rw-r--r--arch/riscv/include/asm/ftrace.h18
-rw-r--r--arch/riscv/include/asm/hwcap.h38
-rw-r--r--arch/riscv/include/asm/hwprobe.h24
-rw-r--r--arch/riscv/include/asm/kfence.h4
-rw-r--r--arch/riscv/include/asm/kvm_host.h12
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h20
-rw-r--r--arch/riscv/include/asm/paravirt.h28
-rw-r--r--arch/riscv/include/asm/paravirt_api_clock.h1
-rw-r--r--arch/riscv/include/asm/pgtable-64.h22
-rw-r--r--arch/riscv/include/asm/pgtable.h35
-rw-r--r--arch/riscv/include/asm/processor.h43
-rw-r--r--arch/riscv/include/asm/sbi.h36
-rw-r--r--arch/riscv/include/asm/sections.h1
-rw-r--r--arch/riscv/include/asm/simd.h64
-rw-r--r--arch/riscv/include/asm/switch_to.h3
-rw-r--r--arch/riscv/include/asm/thread_info.h3
-rw-r--r--arch/riscv/include/asm/tlbbatch.h15
-rw-r--r--arch/riscv/include/asm/tlbflush.h9
-rw-r--r--arch/riscv/include/asm/vector.h90
-rw-r--r--arch/riscv/include/asm/word-at-a-time.h27
-rw-r--r--arch/riscv/include/asm/xip_fixup.h2
-rw-r--r--arch/riscv/include/asm/xor.h68
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h32
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h13
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c19
-rw-r--r--arch/riscv/kernel/cpu_ops.c14
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c19
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c11
-rw-r--r--arch/riscv/kernel/cpufeature.c285
-rw-r--r--arch/riscv/kernel/efi.c2
-rw-r--r--arch/riscv/kernel/entry.S8
-rw-r--r--arch/riscv/kernel/ftrace.c30
-rw-r--r--arch/riscv/kernel/head.S6
-rw-r--r--arch/riscv/kernel/kernel_mode_vector.c247
-rw-r--r--arch/riscv/kernel/mcount-dyn.S200
-rw-r--r--arch/riscv/kernel/mcount.S2
-rw-r--r--arch/riscv/kernel/module.c37
-rw-r--r--arch/riscv/kernel/paravirt.c135
-rw-r--r--arch/riscv/kernel/patch.c11
-rw-r--r--arch/riscv/kernel/pi/cmdline_early.c3
-rw-r--r--arch/riscv/kernel/process.c13
-rw-r--r--arch/riscv/kernel/ptrace.c7
-rw-r--r--arch/riscv/kernel/sbi.c66
-rw-r--r--arch/riscv/kernel/setup.c19
-rw-r--r--arch/riscv/kernel/signal.c9
-rw-r--r--arch/riscv/kernel/smp.c2
-rw-r--r--arch/riscv/kernel/smpboot.c38
-rw-r--r--arch/riscv/kernel/suspend.c44
-rw-r--r--arch/riscv/kernel/sys_hwprobe.c411
-rw-r--r--arch/riscv/kernel/sys_riscv.c285
-rw-r--r--arch/riscv/kernel/time.c3
-rw-r--r--arch/riscv/kernel/traps_misaligned.c6
-rw-r--r--arch/riscv/kernel/vdso/hwprobe.c86
-rw-r--r--arch/riscv/kernel/vector.c53
-rw-r--r--arch/riscv/kernel/vmlinux-xip.lds.S2
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S2
-rw-r--r--arch/riscv/kvm/Kconfig7
-rw-r--r--arch/riscv/kvm/Makefile1
-rw-r--r--arch/riscv/kvm/mmu.c22
-rw-r--r--arch/riscv/kvm/vcpu.c10
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c135
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c142
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c208
-rw-r--r--arch/riscv/kvm/vcpu_switch.S32
-rw-r--r--arch/riscv/kvm/vcpu_vector.c16
-rw-r--r--arch/riscv/kvm/vm.c1
-rw-r--r--arch/riscv/lib/Makefile6
-rw-r--r--arch/riscv/lib/clear_page.S2
-rw-r--r--arch/riscv/lib/csum.c328
-rw-r--r--arch/riscv/lib/riscv_v_helpers.c45
-rw-r--r--arch/riscv/lib/tishift.S2
-rw-r--r--arch/riscv/lib/uaccess.S12
-rw-r--r--arch/riscv/lib/uaccess_vector.S53
-rw-r--r--arch/riscv/lib/xor.S81
-rw-r--r--arch/riscv/mm/Makefile3
-rw-r--r--arch/riscv/mm/dma-noncoherent.c2
-rw-r--r--arch/riscv/mm/extable.c31
-rw-r--r--arch/riscv/mm/fault.c16
-rw-r--r--arch/riscv/mm/hugetlbpage.c12
-rw-r--r--arch/riscv/mm/init.c33
-rw-r--r--arch/riscv/mm/kasan_init.c53
-rw-r--r--arch/riscv/mm/pageattr.c55
-rw-r--r--arch/riscv/mm/pgtable.c51
-rw-r--r--arch/riscv/mm/tlbflush.c74
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c5
-rw-r--r--arch/s390/Kconfig7
-rw-r--r--arch/s390/configs/debug_defconfig4
-rw-r--r--arch/s390/configs/defconfig4
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/include/asm/facility.h6
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/include/asm/pci_io.h32
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/facility.c21
-rw-r--r--arch/s390/kernel/fpu.c1
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c80
-rw-r--r--arch/s390/kernel/perf_pai_ext.c79
-rw-r--r--arch/s390/kernel/ptrace.c1
-rw-r--r--arch/s390/kvm/Kconfig5
-rw-r--r--arch/s390/kvm/guestdbg.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/vsie.c19
-rw-r--r--arch/s390/mm/fault.c4
-rw-r--r--arch/s390/pci/pci_mmio.c12
-rw-r--r--arch/sh/boards/mach-ecovec24/setup.c2
-rw-r--r--arch/sh/configs/sdk7786_defconfig3
-rw-r--r--arch/sh/include/asm/cacheflush.h1
-rw-r--r--arch/sh/kernel/vsyscall/Makefile5
-rw-r--r--arch/sparc/include/asm/cacheflush_32.h1
-rw-r--r--arch/sparc/include/asm/cacheflush_64.h1
-rw-r--r--arch/sparc/kernel/pci_sabre.c9
-rw-r--r--arch/sparc/kernel/pci_schizo.c13
-rw-r--r--arch/sparc/vdso/Makefile18
-rw-r--r--arch/um/Makefile-skas5
-rw-r--r--arch/um/drivers/chan.h2
-rw-r--r--arch/um/drivers/chan_kern.c9
-rw-r--r--arch/um/drivers/chan_user.c46
-rw-r--r--arch/um/drivers/chan_user.h9
-rw-r--r--arch/um/drivers/line.c15
-rw-r--r--arch/um/drivers/line.h6
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/null.c2
-rw-r--r--arch/um/drivers/virt-pci.c2
-rw-r--r--arch/um/include/asm/mmu.h1
-rw-r--r--arch/um/include/asm/processor-generic.h1
-rw-r--r--arch/um/include/shared/kern_util.h5
-rw-r--r--arch/um/include/shared/os.h3
-rw-r--r--arch/um/include/shared/ptrace_user.h41
-rw-r--r--arch/um/include/shared/registers.h2
-rw-r--r--arch/um/kernel/process.c14
-rw-r--r--arch/um/kernel/ptrace.c2
-rw-r--r--arch/um/kernel/signal.c12
-rw-r--r--arch/um/kernel/skas/uaccess.c4
-rw-r--r--arch/um/kernel/time.c32
-rw-r--r--arch/um/os-Linux/helper.c6
-rw-r--r--arch/um/os-Linux/registers.c20
-rw-r--r--arch/um/os-Linux/skas/process.c117
-rw-r--r--arch/um/os-Linux/start_up.c111
-rw-r--r--arch/um/os-Linux/util.c19
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/coco/tdx/tdx-shared.c6
-rw-r--r--arch/x86/include/asm/cpu.h4
-rw-r--r--arch/x86/include/asm/cpufeatures.h6
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/kmsan.h17
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h3
-rw-r--r--arch/x86/include/asm/kvm-x86-pmu-ops.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h75
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/shared/tdx.h6
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h25
-rw-r--r--arch/x86/include/asm/tdx.h38
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/aperture_64.c3
-rw-r--r--arch/x86/kernel/cpu/amd.c28
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mce/core.c17
-rw-r--r--arch/x86/kernel/early-quirks.c4
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/kvmclock.c12
-rw-r--r--arch/x86/kernel/rtc.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/topology.c33
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kvm/Kconfig47
-rw-r--r--arch/x86/kvm/Makefile16
-rw-r--r--arch/x86/kvm/cpuid.c33
-rw-r--r--arch/x86/kvm/cpuid.h13
-rw-r--r--arch/x86/kvm/debugfs.c2
-rw-r--r--arch/x86/kvm/emulate.c27
-rw-r--r--arch/x86/kvm/governed_features.h1
-rw-r--r--arch/x86/kvm/hyperv.h85
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/irq_comm.c9
-rw-r--r--arch/x86/kvm/kvm_emulate.h9
-rw-r--r--arch/x86/kvm/kvm_onhyperv.h20
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/mmu.h8
-rw-r--r--arch/x86/kvm/mmu/mmu.c293
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h3
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c95
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h3
-rw-r--r--arch/x86/kvm/pmu.c140
-rw-r--r--arch/x86/kvm/pmu.h47
-rw-r--r--arch/x86/kvm/reverse_cpuid.h35
-rw-r--r--arch/x86/kvm/svm/hyperv.h9
-rw-r--r--arch/x86/kvm/svm/nested.c49
-rw-r--r--arch/x86/kvm/svm/pmu.c17
-rw-r--r--arch/x86/kvm/svm/sev.c7
-rw-r--r--arch/x86/kvm/svm/svm.c18
-rw-r--r--arch/x86/kvm/svm/svm.h2
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.c10
-rw-r--r--arch/x86/kvm/svm/vmenter.S10
-rw-r--r--arch/x86/kvm/vmx/hyperv.c447
-rw-r--r--arch/x86/kvm/vmx/hyperv.h204
-rw-r--r--arch/x86/kvm/vmx/hyperv_evmcs.c315
-rw-r--r--arch/x86/kvm/vmx/hyperv_evmcs.h166
-rw-r--r--arch/x86/kvm/vmx/nested.c160
-rw-r--r--arch/x86/kvm/vmx/nested.h3
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c22
-rw-r--r--arch/x86/kvm/vmx/sgx.c1
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/vmx/vmx.c86
-rw-r--r--arch/x86/kvm/vmx/vmx.h14
-rw-r--r--arch/x86/kvm/vmx/vmx_onhyperv.c36
-rw-r--r--arch/x86/kvm/vmx/vmx_onhyperv.h125
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h2
-rw-r--r--arch/x86/kvm/x86.c168
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/kvm/xen.c9
-rw-r--r--arch/x86/pci/acpi.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c180
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c42
-rw-r--r--arch/x86/pci/pcbios.c28
-rw-r--r--arch/x86/um/asm/elf.h4
-rw-r--r--arch/x86/um/asm/processor_64.h3
-rw-r--r--arch/x86/um/os-Linux/Makefile1
-rw-r--r--arch/x86/um/os-Linux/prctl.c12
-rw-r--r--arch/x86/um/ptrace_32.c24
-rw-r--r--arch/x86/um/ptrace_64.c26
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_32.h4
-rw-r--r--arch/x86/um/shared/sysdep/ptrace_user.h12
-rw-r--r--arch/x86/um/shared/sysdep/stub_32.h39
-rw-r--r--arch/x86/um/shared/sysdep/stub_64.h17
-rw-r--r--arch/x86/um/syscalls_64.c62
-rw-r--r--arch/x86/um/tls_64.c2
-rw-r--r--arch/x86/virt/vmx/tdx/Makefile2
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.c1492
-rw-r--r--arch/x86/virt/vmx/tdx/tdx.h121
-rw-r--r--arch/xtensa/Kconfig2
-rw-r--r--arch/xtensa/Makefile6
-rw-r--r--arch/xtensa/include/asm/asmmacro.h2
-rw-r--r--arch/xtensa/include/asm/cacheflush.h6
-rw-r--r--arch/xtensa/lib/pci-auto.c8
-rw-r--r--arch/xtensa/platforms/iss/console.c2
523 files changed, 11630 insertions, 4209 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 5ca66aad0d08..a5af0edd3eb8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -301,6 +301,11 @@ config ARCH_HAS_DMA_CLEAR_UNCACHED
config ARCH_HAS_CPU_FINALIZE_INIT
bool
+# The architecture has a per-task state that includes the mm's PASID
+config ARCH_HAS_CPU_PASID
+ bool
+ select IOMMU_MM_DATA
+
config HAVE_ARCH_THREAD_STRUCT_WHITELIST
bool
help
@@ -668,6 +673,7 @@ config SHADOW_CALL_STACK
bool "Shadow Call Stack"
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+ depends on MMU
help
This option enables the compiler's Shadow Call Stack, which
uses a shadow stack to protect function return addresses from
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index fb3025396ac9..cfdf90bc8b3f 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -80,7 +80,7 @@ init_rtc_epoch(void)
static int
alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
- int ret = mc146818_get_time(tm);
+ int ret = mc146818_get_time(tm, 10);
if (ret < 0) {
dev_err_ratelimited(dev, "unable to read current time\n");
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index d6139dbae4ac..feaf89f6936b 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -53,7 +53,7 @@ srmcons_do_receive_chars(struct tty_port *port)
do {
result.as_long = callback_getc(0);
if (result.bits.status < 2) {
- tty_insert_flip_char(port, (char)result.bits.c, 0);
+ tty_insert_flip_char(port, (u8)result.bits.c, 0);
count++;
}
} while((result.bits.status & 1) && (++loops < 10));
@@ -88,30 +88,27 @@ srmcons_receive_chars(struct timer_list *t)
}
/* called with callback_lock held */
-static int
-srmcons_do_write(struct tty_port *port, const char *buf, int count)
+static void
+srmcons_do_write(struct tty_port *port, const u8 *buf, size_t count)
{
- static char str_cr[1] = "\r";
- long c, remaining = count;
+ size_t c;
srmcons_result result;
- char *cur;
- int need_cr;
- for (cur = (char *)buf; remaining > 0; ) {
- need_cr = 0;
+ while (count > 0) {
+ bool need_cr = false;
/*
* Break it up into reasonable size chunks to allow a chance
* for input to get in
*/
- for (c = 0; c < min_t(long, 128L, remaining) && !need_cr; c++)
- if (cur[c] == '\n')
- need_cr = 1;
+ for (c = 0; c < min_t(size_t, 128U, count) && !need_cr; c++)
+ if (buf[c] == '\n')
+ need_cr = true;
while (c > 0) {
- result.as_long = callback_puts(0, cur, c);
+ result.as_long = callback_puts(0, buf, c);
c -= result.bits.c;
- remaining -= result.bits.c;
- cur += result.bits.c;
+ count -= result.bits.c;
+ buf += result.bits.c;
/*
* Check for pending input iff a tty port was provided
@@ -121,12 +118,11 @@ srmcons_do_write(struct tty_port *port, const char *buf, int count)
}
while (need_cr) {
- result.as_long = callback_puts(0, str_cr, 1);
+ result.as_long = callback_puts(0, "\r", 1);
if (result.bits.c > 0)
- need_cr = 0;
+ need_cr = false;
}
}
- return count;
}
static ssize_t
@@ -135,7 +131,7 @@ srmcons_write(struct tty_struct *tty, const u8 *buf, size_t count)
unsigned long flags;
spin_lock_irqsave(&srmcons_callback_lock, flags);
- srmcons_do_write(tty->port, (const char *) buf, count);
+ srmcons_do_write(tty->port, buf, count);
spin_unlock_irqrestore(&srmcons_callback_lock, flags);
return count;
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index 563af3e75f01..329c94cd45d8 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -40,6 +40,7 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz);
/* TBD: optimize this */
#define flush_cache_vmap(start, end) flush_cache_all()
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) flush_cache_all()
#define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 2a7fbbb83b70..197707bc7658 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -91,7 +91,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
* Plug in direct dma map ops.
*/
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
/*
* IOC hardware snoops all DMA traffic keeping the caches consistent
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b67d05468251..0af6709570d1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,7 @@ config ARM
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+ select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts
index e899de681f47..5be0e8fd2633 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts
@@ -45,8 +45,8 @@
num-chipselects = <1>;
cs-gpios = <&gpio0 ASPEED_GPIO(Z, 0) GPIO_ACTIVE_LOW>;
- tpmdev@0 {
- compatible = "tcg,tpm_tis-spi";
+ tpm@0 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
spi-max-frequency = <33000000>;
reg = <0>;
};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts
index a677c827e758..5a8169bbda87 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts
@@ -80,8 +80,8 @@
gpio-miso = <&gpio ASPEED_GPIO(R, 5) GPIO_ACTIVE_HIGH>;
num-chipselects = <1>;
- tpmdev@0 {
- compatible = "tcg,tpm_tis-spi";
+ tpm@0 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
spi-max-frequency = <33000000>;
reg = <0>;
};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts
index 3f6010ef2b86..213023bc5aec 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts
@@ -456,7 +456,7 @@
status = "okay";
tpm: tpm@2e {
- compatible = "tcg,tpm-tis-i2c";
+ compatible = "nuvoton,npct75x", "tcg,tpm-tis-i2c";
reg = <0x2e>;
};
};
diff --git a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi
index 31590d3186a2..00e5887c926f 100644
--- a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi
+++ b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi
@@ -35,8 +35,8 @@
gpio-mosi = <&gpio0 ASPEED_GPIO(X, 4) GPIO_ACTIVE_HIGH>;
gpio-miso = <&gpio0 ASPEED_GPIO(X, 5) GPIO_ACTIVE_HIGH>;
- tpmdev@0 {
- compatible = "tcg,tpm_tis-spi";
+ tpm@0 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
spi-max-frequency = <33000000>;
reg = <0>;
};
diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi
index 98817a6675b9..d233a191c139 100644
--- a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcm2835-rpi.dtsi"
+#include <dt-bindings/power/raspberrypi-power.h>
#include <dt-bindings/reset/raspberrypi,firmware-reset.h>
/ {
@@ -76,3 +77,7 @@
&vchiq {
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
};
+
+&xhci {
+ power-domains = <&power RPI_POWER_DOMAIN_USB>;
+};
diff --git a/arch/arm/boot/dts/broadcom/bcm2711.dtsi b/arch/arm/boot/dts/broadcom/bcm2711.dtsi
index 4a379a14966d..22c7f1561344 100644
--- a/arch/arm/boot/dts/broadcom/bcm2711.dtsi
+++ b/arch/arm/boot/dts/broadcom/bcm2711.dtsi
@@ -604,6 +604,20 @@
};
};
+ xhci: usb@7e9c0000 {
+ compatible = "brcm,bcm2711-xhci", "brcm,xhci-brcm-v2";
+ reg = <0x0 0x7e9c0000 0x100000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_HIGH>;
+ /* DWC2 and this IP block share the same USB PHY,
+ * enabling both at the same time results in lockups.
+ * So keep this node disabled and let the bootloader
+ * decide which interface should be enabled.
+ */
+ status = "disabled";
+ };
+
v3d: gpu@7ec00000 {
compatible = "brcm,2711-v3d";
reg = <0x0 0x7ec00000 0x4000>,
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi
index 44cc4ff1d0df..d12fb44aeb14 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi
@@ -116,7 +116,7 @@
tpm_tis: tpm@1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_tpm>;
- compatible = "tcg,tpm_tis-spi";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
reg = <1>;
spi-max-frequency = <20000000>;
interrupt-parent = <&gpio5>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts
index 3a723843d562..9984b343cdf0 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts
@@ -130,7 +130,7 @@
* TCG specification - Section 6.4.1 Clocking:
* TPM shall support a SPI clock frequency range of 10-24 MHz.
*/
- st33htph: tpm-tis@0 {
+ st33htph: tpm@0 {
compatible = "st,st33htpm-spi", "tcg,tpm_tis-spi";
reg = <0>;
spi-max-frequency = <24000000>;
diff --git a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi
index d7954ff466b4..e5254e32aa8f 100644
--- a/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi
+++ b/arch/arm/boot/dts/samsung/exynos4212-tab3.dtsi
@@ -434,6 +434,7 @@
};
&fimd {
+ samsung,invert-vclk;
status = "okay";
};
diff --git a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi
index b8730aa52ce6..a59331aa58e5 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi
@@ -217,7 +217,7 @@
pinctrl-names = "default";
pinctrl-0 = <&spi1_pins>;
- tpm_spi_tis@0 {
+ tpm@0 {
compatible = "tcg,tpm_tis-spi";
reg = <0>;
spi-max-frequency = <500000>;
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 70480dd9e96d..6d0c9f7268ba 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -68,6 +68,8 @@ struct locomo {
#endif
};
+static const struct bus_type locomo_bus_type;
+
struct locomo_dev_info {
unsigned long offset;
unsigned long length;
@@ -842,7 +844,7 @@ static void locomo_bus_remove(struct device *dev)
drv->remove(ldev);
}
-struct bus_type locomo_bus_type = {
+static const struct bus_type locomo_bus_type = {
.name = "locomo-bus",
.match = locomo_match,
.probe = locomo_bus_probe,
diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
index feb38a94c1a7..43bc1255a5db 100644
--- a/arch/arm/configs/mxs_defconfig
+++ b/arch/arm/configs/mxs_defconfig
@@ -138,7 +138,8 @@ CONFIG_PWM_MXS=y
CONFIG_NVMEM_MXS_OCOTP=y
CONFIG_EXT4_FS=y
# CONFIG_DNOTIFY is not set
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_FSCACHE_STATS=y
CONFIG_CACHEFILES=m
CONFIG_VFAT_FS=y
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index f6181f69577f..1075534b0a2e 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -340,6 +340,8 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
dsb(ishst);
}
+#define flush_cache_vmap_early(start, end) do { } while (0)
+
static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
{
if (!cache_is_vipt_nonaliasing())
diff --git a/arch/arm/include/asm/hardware/locomo.h b/arch/arm/include/asm/hardware/locomo.h
index aaaedafef7cc..9fd9ad5d9202 100644
--- a/arch/arm/include/asm/hardware/locomo.h
+++ b/arch/arm/include/asm/hardware/locomo.h
@@ -158,8 +158,6 @@
#define LOCOMO_LPT_TOH(TOH) ((TOH & 0x7) << 4)
#define LOCOMO_LPT_TOL(TOL) ((TOL & 0x7))
-extern struct bus_type locomo_bus_type;
-
#define LOCOMO_DEVID_KEYBOARD 0
#define LOCOMO_DEVID_FRONTLIGHT 1
#define LOCOMO_DEVID_BACKLIGHT 2
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 16b02f44c7d3..d657b84b6bf7 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -151,6 +151,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+#define pgdp_get(pgpd) READ_ONCE(*pgdp)
+
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
#define pud_write(pud) pmd_write(__pmd(pud_val(pud)))
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 59de137c6f53..2a8a9fe46586 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -11,6 +11,7 @@ menuconfig ARCH_DAVINCI
select PM_GENERIC_DOMAINS_OF if PM && OF
select REGMAP_MMIO
select RESET_CONTROLLER
+ select PINCTRL
select PINCTRL_SINGLE
if ARCH_DAVINCI
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index cfd9c933d2f0..b94850b57995 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -34,7 +34,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
if (IS_ENABLED(CONFIG_CPU_V7M)) {
/*
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 5409225b4abc..f68db05eba29 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -859,10 +859,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
int i = 0;
int order_idx = 0;
- if (array_size <= PAGE_SIZE)
- pages = kzalloc(array_size, GFP_KERNEL);
- else
- pages = vzalloc(array_size);
+ pages = kvzalloc(array_size, GFP_KERNEL);
if (!pages)
return NULL;
@@ -1713,7 +1710,7 @@ void arm_iommu_detach_device(struct device *dev)
EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
struct dma_iommu_mapping *mapping;
@@ -1748,7 +1745,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
#else
static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
}
@@ -1757,7 +1754,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { }
#endif /* CONFIG_ARM_DMA_USE_IOMMU */
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
/*
* Due to legacy code that sets the ->dma_coherent flag from a bus
@@ -1776,8 +1773,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
if (dev->dma_ops)
return;
- if (iommu)
- arm_setup_iommu_dma_ops(dev, dma_base, size, iommu, coherent);
+ if (device_iommu_mapped(dev))
+ arm_setup_iommu_dma_ops(dev, dma_base, size, coherent);
xen_setup_dma_ops(dev);
dev->archdata.dma_ops_setup = true;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index fef62e4a9edd..e96fb40b9cc3 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -278,6 +278,35 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+ if (!(flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, addr);
+ if (!vma)
+ goto lock_mmap;
+
+ if (!(vma->vm_flags & vm_flags)) {
+ vma_end_read(vma);
+ goto lock_mmap;
+ }
+ fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
+
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
+ }
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
+ /* Quick path to respond to signals */
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ goto no_context;
+ return 0;
+ }
+lock_mmap:
+
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
if (unlikely(!vma)) {
@@ -316,6 +345,7 @@ retry:
}
mmap_read_unlock(mm);
+done:
/*
* Handle the "normal" case first - VM_FAULT_MAJOR
diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c
index 24d71b5db62d..111d4f703136 100644
--- a/arch/arm/mm/kasan_init.c
+++ b/arch/arm/mm/kasan_init.c
@@ -28,6 +28,12 @@ static pgd_t tmp_pgd_table[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
+static __init void *kasan_alloc_block_raw(size_t size)
+{
+ return memblock_alloc_try_nid_raw(size, size, __pa(MAX_DMA_ADDRESS),
+ MEMBLOCK_ALLOC_NOLEAKTRACE, NUMA_NO_NODE);
+}
+
static __init void *kasan_alloc_block(size_t size)
{
return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
@@ -50,7 +56,7 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
if (!pte_none(READ_ONCE(*ptep)))
continue;
- p = kasan_alloc_block(PAGE_SIZE);
+ p = kasan_alloc_block_raw(PAGE_SIZE);
if (!p) {
panic("%s failed to allocate shadow page for address 0x%lx\n",
__func__, addr);
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 7e8773a2d99d..b68efe643a12 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -800,6 +800,24 @@ static struct undef_hook neon_support_hook[] = {{
.cpsr_mask = PSR_T_BIT,
.cpsr_val = PSR_T_BIT,
.fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff000800,
+ .instr_val = 0xfc000800,
+ .cpsr_mask = 0,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff000800,
+ .instr_val = 0xfd000800,
+ .cpsr_mask = 0,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
+}, {
+ .instr_mask = 0xff000800,
+ .instr_val = 0xfe000800,
+ .cpsr_mask = 0,
+ .cpsr_val = 0,
+ .fn = vfp_support_entry,
}};
static struct undef_hook vfp_support_hook = {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8f6cf1221b6a..aa7c1d435139 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -133,6 +133,7 @@ config ARM64
select GENERIC_ARCH_TOPOLOGY
select GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_DEVICES
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP
@@ -1038,8 +1039,12 @@ config ARM64_ERRATUM_2645198
If unsure, say Y.
+config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ bool
+
config ARM64_ERRATUM_2966298
bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load"
+ select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
default y
help
This option adds the workaround for ARM Cortex-A520 erratum 2966298.
@@ -1051,6 +1056,20 @@ config ARM64_ERRATUM_2966298
If unsure, say Y.
+config ARM64_ERRATUM_3117295
+ bool "Cortex-A510: 3117295: workaround for speculatively executed unprivileged load"
+ select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 3117295.
+
+ On an affected Cortex-A510 core, a speculatively executed unprivileged
+ load might leak data from a privileged level via a cache side channel.
+
+ Work around this problem by executing a TLBI before returning to EL0.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/boot/dts/exynos/google/gs101.dtsi b/arch/arm64/boot/dts/exynos/google/gs101.dtsi
index 9747cb3fa03a..d838e3a7af6e 100644
--- a/arch/arm64/boot/dts/exynos/google/gs101.dtsi
+++ b/arch/arm64/boot/dts/exynos/google/gs101.dtsi
@@ -289,7 +289,7 @@
#clock-cells = <1>;
clocks = <&cmu_top CLK_DOUT_CMU_MISC_BUS>,
<&cmu_top CLK_DOUT_CMU_MISC_SSS>;
- clock-names = "dout_cmu_misc_bus", "dout_cmu_misc_sss";
+ clock-names = "bus", "sss";
};
watchdog_cl0: watchdog@10060000 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts
index 968f475b9a96..27a902569e2a 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phygate-tauri-l.dts
@@ -120,7 +120,7 @@
};
tpm: tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
interrupt-parent = <&gpio2>;
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
index 3f3f2a2c89cd..752caa38eb03 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw72xx.dtsi
@@ -89,7 +89,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
index 06fed9376996..2aa6c1090fc7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw73xx.dtsi
@@ -109,7 +109,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
index feae77e03835..a08057410bde 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-kit.dts
@@ -234,7 +234,7 @@
status = "okay";
tpm: tpm@0 {
- compatible = "infineon,slb9670";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
reg = <0>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_tpm>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
index cc9d468b43ab..f87fa5a948cc 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts
@@ -137,6 +137,28 @@
};
};
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ dsp_vdev0vring0: vdev0vring0@942f0000 {
+ reg = <0 0x942f0000 0 0x8000>;
+ no-map;
+ };
+
+ dsp_vdev0vring1: vdev0vring1@942f8000 {
+ reg = <0 0x942f8000 0 0x8000>;
+ no-map;
+ };
+
+ dsp_vdev0buffer: vdev0buffer@94300000 {
+ compatible = "shared-dma-pool";
+ reg = <0 0x94300000 0 0x100000>;
+ no-map;
+ };
+ };
};
&flexspi {
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index c24587c895e1..41c79d2ebdd6 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -103,7 +103,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index 628ffba69862..d5c400b355af 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -115,7 +115,7 @@
status = "okay";
tpm@1 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
index 9caf7ca25444..cae586cd45bd 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
@@ -196,7 +196,7 @@
status = "okay";
tpm@0 {
- compatible = "tcg,tpm_tis-spi";
+ compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x0>;
spi-max-frequency = <36000000>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
index 6376417e918c..d8cf1f27c3ec 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
@@ -65,7 +65,7 @@
status = "okay";
tpm@0 {
- compatible = "infineon,slb9670";
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
reg = <0>;
spi-max-frequency = <43000000>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index 5506de83f61d..1b3396b1cee3 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -888,7 +888,7 @@
status = "okay";
cs-gpios = <&pio 86 GPIO_ACTIVE_LOW>;
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
spi-max-frequency = <1000000>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
index f2281250ac35..d87aab8d7a79 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
@@ -1402,7 +1402,7 @@
pinctrl-names = "default";
pinctrl-0 = <&spi5_pins>;
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
interrupts-extended = <&pio 171 IRQ_TYPE_EDGE_RISING>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
index bbdcd441c049..3c6079edda19 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
@@ -1294,6 +1294,7 @@
&xhci0 {
status = "okay";
+ rx-fifo-depth = <3072>;
vusb33-supply = <&mt6359_vusb_ldo_reg>;
vbus-supply = <&usb_vbus>;
};
@@ -1301,6 +1302,7 @@
&xhci1 {
status = "okay";
+ rx-fifo-depth = <3072>;
vusb33-supply = <&mt6359_vusb_ldo_reg>;
vbus-supply = <&usb_vbus>;
};
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 7f8327b0dbdb..e423c57ddd41 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -540,9 +540,6 @@
compatible = "qcom,msm8916-bimc";
reg = <0x00400000 0x62000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
- <&rpmcc RPM_SMD_BIMC_A_CLK>;
};
tsens: thermal-sensor@4a9000 {
@@ -575,18 +572,12 @@
compatible = "qcom,msm8916-pcnoc";
reg = <0x00500000 0x11000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_PCNOC_CLK>,
- <&rpmcc RPM_SMD_PCNOC_A_CLK>;
};
snoc: interconnect@580000 {
compatible = "qcom,msm8916-snoc";
reg = <0x00580000 0x14000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
- <&rpmcc RPM_SMD_SNOC_A_CLK>;
};
stm: stm@802000 {
diff --git a/arch/arm64/boot/dts/qcom/msm8939.dtsi b/arch/arm64/boot/dts/qcom/msm8939.dtsi
index 29f6bd9df2eb..82d85ff61045 100644
--- a/arch/arm64/boot/dts/qcom/msm8939.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8939.dtsi
@@ -602,9 +602,6 @@
bimc: interconnect@400000 {
compatible = "qcom,msm8939-bimc";
reg = <0x00400000 0x62000>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
- <&rpmcc RPM_SMD_BIMC_A_CLK>;
#interconnect-cells = <1>;
};
@@ -648,25 +645,16 @@
pcnoc: interconnect@500000 {
compatible = "qcom,msm8939-pcnoc";
reg = <0x00500000 0x11000>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_PCNOC_CLK>,
- <&rpmcc RPM_SMD_PCNOC_A_CLK>;
#interconnect-cells = <1>;
};
snoc: interconnect@580000 {
compatible = "qcom,msm8939-snoc";
reg = <0x00580000 0x14080>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
- <&rpmcc RPM_SMD_SNOC_A_CLK>;
#interconnect-cells = <1>;
snoc_mm: interconnect-snoc {
compatible = "qcom,msm8939-snoc-mm";
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_SYSMMNOC_CLK>,
- <&rpmcc RPM_SMD_SYSMMNOC_A_CLK>;
#interconnect-cells = <1>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 8c6a7efa90c4..8d41ed261adf 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -838,9 +838,6 @@
compatible = "qcom,msm8996-bimc";
reg = <0x00408000 0x5a000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
- <&rpmcc RPM_SMD_BIMC_A_CLK>;
};
tsens0: thermal-sensor@4a9000 {
@@ -891,18 +888,12 @@
compatible = "qcom,msm8996-cnoc";
reg = <0x00500000 0x1000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_CNOC_CLK>,
- <&rpmcc RPM_SMD_CNOC_A_CLK>;
};
snoc: interconnect@524000 {
compatible = "qcom,msm8996-snoc";
reg = <0x00524000 0x1c000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
- <&rpmcc RPM_SMD_SNOC_A_CLK>;
};
a0noc: interconnect@543000 {
@@ -922,19 +913,14 @@
compatible = "qcom,msm8996-a1noc";
reg = <0x00562000 0x5000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_AGGR1_NOC_CLK>,
- <&rpmcc RPM_SMD_AGGR1_NOC_A_CLK>;
};
a2noc: interconnect@583000 {
compatible = "qcom,msm8996-a2noc";
reg = <0x00583000 0x7000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a", "aggre2_ufs_axi", "ufs_axi";
- clocks = <&rpmcc RPM_SMD_AGGR2_NOC_CLK>,
- <&rpmcc RPM_SMD_AGGR2_NOC_A_CLK>,
- <&gcc GCC_AGGRE2_UFS_AXI_CLK>,
+ clock-names = "aggre2_ufs_axi", "ufs_axi";
+ clocks = <&gcc GCC_AGGRE2_UFS_AXI_CLK>,
<&gcc GCC_UFS_AXI_CLK>;
};
@@ -942,19 +928,14 @@
compatible = "qcom,msm8996-mnoc";
reg = <0x005a4000 0x1c000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a", "iface";
- clocks = <&rpmcc RPM_SMD_MMAXI_CLK>,
- <&rpmcc RPM_SMD_MMAXI_A_CLK>,
- <&mmcc AHB_CLK_SRC>;
+ clock-names = "iface";
+ clocks = <&mmcc AHB_CLK_SRC>;
};
pnoc: interconnect@5c0000 {
compatible = "qcom,msm8996-pnoc";
reg = <0x005c0000 0x3000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_PCNOC_CLK>,
- <&rpmcc RPM_SMD_PCNOC_A_CLK>;
};
tcsr_mutex: hwlock@740000 {
@@ -2486,9 +2467,8 @@
"handover",
"stop-ack";
- clocks = <&xo_board>,
- <&rpmcc RPM_SMD_AGGR2_NOC_CLK>;
- clock-names = "xo", "aggre2";
+ clocks = <&xo_board>;
+ clock-names = "xo";
memory-region = <&slpi_mem>;
@@ -2533,10 +2513,15 @@
<&gcc GCC_MSS_GPLL0_DIV_CLK>,
<&gcc GCC_MSS_SNOC_AXI_CLK>,
<&gcc GCC_MSS_MNOC_BIMC_AXI_CLK>,
- <&rpmcc RPM_SMD_PCNOC_CLK>,
<&rpmcc RPM_SMD_QDSS_CLK>;
- clock-names = "iface", "bus", "mem", "xo", "gpll0_mss",
- "snoc_axi", "mnoc_axi", "pnoc", "qdss";
+ clock-names = "iface",
+ "bus",
+ "mem",
+ "xo",
+ "gpll0_mss",
+ "snoc_axi",
+ "mnoc_axi",
+ "qdss";
resets = <&gcc GCC_MSS_RESTART>;
reset-names = "mss_restart";
diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi
index bb591c6bf573..2793cc22d381 100644
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -1605,9 +1605,8 @@
px-supply = <&vreg_lvs2a_1p8>;
- clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
- <&rpmcc RPM_SMD_AGGR2_NOC_CLK>;
- clock-names = "xo", "aggre2";
+ clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>;
+ clock-names = "xo";
memory-region = <&slpi_mem>;
diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi
index 6ac64ce9bb68..2f2eeaf2e945 100644
--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
@@ -558,9 +558,6 @@
reg = <0x00400000 0x80000>;
compatible = "qcom,qcs404-bimc";
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
- <&rpmcc RPM_SMD_BIMC_A_CLK>;
};
tsens: thermal-sensor@4a9000 {
@@ -601,18 +598,12 @@
reg = <0x00500000 0x15080>;
compatible = "qcom,qcs404-pcnoc";
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_PNOC_CLK>,
- <&rpmcc RPM_SMD_PNOC_A_CLK>;
};
snoc: interconnect@580000 {
reg = <0x00580000 0x23080>;
compatible = "qcom,qcs404-snoc";
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
- <&rpmcc RPM_SMD_SNOC_A_CLK>;
};
remoteproc_cdsp: remoteproc@b00000 {
diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
index 043184557873..cd0db4f31d4a 100644
--- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
+++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
@@ -1454,7 +1454,7 @@
altmodes {
displayport {
- svid = <0xff01>;
+ svid = /bits/ 16 <0xff01>;
vdo = <0x00001c46>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi
index 775700f78e0f..513fe5e76b68 100644
--- a/arch/arm64/boot/dts/qcom/sdm630.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi
@@ -606,9 +606,6 @@
compatible = "qcom,sdm660-bimc";
reg = <0x01008000 0x78000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
- <&rpmcc RPM_SMD_BIMC_A_CLK>;
};
restart@10ac000 {
@@ -620,28 +617,17 @@
compatible = "qcom,sdm660-cnoc";
reg = <0x01500000 0x10000>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_CNOC_CLK>,
- <&rpmcc RPM_SMD_CNOC_A_CLK>;
};
snoc: interconnect@1626000 {
compatible = "qcom,sdm660-snoc";
reg = <0x01626000 0x7090>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a";
- clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
- <&rpmcc RPM_SMD_SNOC_A_CLK>;
};
anoc2_smmu: iommu@16c0000 {
compatible = "qcom,sdm630-smmu-v2", "qcom,smmu-v2";
reg = <0x016c0000 0x40000>;
-
- assigned-clocks = <&rpmcc RPM_SMD_AGGR2_NOC_CLK>;
- assigned-clock-rates = <1000>;
- clocks = <&rpmcc RPM_SMD_AGGR2_NOC_CLK>;
- clock-names = "bus";
#global-interrupts = <2>;
#iommu-cells = <1>;
@@ -686,16 +672,12 @@
compatible = "qcom,sdm660-a2noc";
reg = <0x01704000 0xc100>;
#interconnect-cells = <1>;
- clock-names = "bus",
- "bus_a",
- "ipa",
+ clock-names = "ipa",
"ufs_axi",
"aggre2_ufs_axi",
"aggre2_usb3_axi",
"cfg_noc_usb2_axi";
- clocks = <&rpmcc RPM_SMD_AGGR2_NOC_CLK>,
- <&rpmcc RPM_SMD_AGGR2_NOC_A_CLK>,
- <&rpmcc RPM_SMD_IPA_CLK>,
+ clocks = <&rpmcc RPM_SMD_IPA_CLK>,
<&gcc GCC_UFS_AXI_CLK>,
<&gcc GCC_AGGRE2_UFS_AXI_CLK>,
<&gcc GCC_AGGRE2_USB3_AXI_CLK>,
@@ -706,10 +688,8 @@
compatible = "qcom,sdm660-mnoc";
reg = <0x01745000 0xa010>;
#interconnect-cells = <1>;
- clock-names = "bus", "bus_a", "iface";
- clocks = <&rpmcc RPM_SMD_MMSSNOC_AXI_CLK>,
- <&rpmcc RPM_SMD_MMSSNOC_AXI_CLK_A>,
- <&mmcc AHB_CLK_SRC>;
+ clock-names = "iface";
+ clocks = <&mmcc AHB_CLK_SRC>;
};
tsens: thermal-sensor@10ae000 {
@@ -1186,7 +1166,9 @@
clocks = <&gcc GCC_GPU_CFG_AHB_CLK>,
<&gcc GCC_BIMC_GFX_CLK>,
<&gcc GCC_GPU_BIMC_GFX_CLK>;
- clock-names = "iface", "mem", "mem_iface";
+ clock-names = "iface",
+ "mem",
+ "mem_iface";
#global-interrupts = <2>;
#iommu-cells = <1>;
@@ -1288,20 +1270,16 @@
<&gcc GCC_USB30_MASTER_CLK>,
<&gcc GCC_AGGRE2_USB3_AXI_CLK>,
<&gcc GCC_USB30_SLEEP_CLK>,
- <&gcc GCC_USB30_MOCK_UTMI_CLK>,
- <&rpmcc RPM_SMD_AGGR2_NOC_CLK>;
+ <&gcc GCC_USB30_MOCK_UTMI_CLK>;
clock-names = "cfg_noc",
"core",
"iface",
"sleep",
- "mock_utmi",
- "bus";
+ "mock_utmi";
assigned-clocks = <&gcc GCC_USB30_MOCK_UTMI_CLK>,
- <&gcc GCC_USB30_MASTER_CLK>,
- <&rpmcc RPM_SMD_AGGR2_NOC_CLK>;
- assigned-clock-rates = <19200000>, <120000000>,
- <19200000>;
+ <&gcc GCC_USB30_MASTER_CLK>;
+ assigned-clock-rates = <19200000>, <120000000>;
interrupts = <GIC_SPI 347 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 243 IRQ_TYPE_LEVEL_HIGH>;
@@ -2204,10 +2182,9 @@
clocks = <&mmcc MNOC_AHB_CLK>,
<&mmcc BIMC_SMMU_AHB_CLK>,
- <&rpmcc RPM_SMD_MMSSNOC_AXI_CLK>,
<&mmcc BIMC_SMMU_AXI_CLK>;
clock-names = "iface-mm", "iface-smmu",
- "bus-mm", "bus-smmu";
+ "bus-smmu";
#global-interrupts = <2>;
#iommu-cells = <1>;
@@ -2324,12 +2301,6 @@
compatible = "qcom,sdm660-gnoc";
reg = <0x17900000 0xe000>;
#interconnect-cells = <1>;
- /*
- * This one apparently features no clocks,
- * so let's not mess with the driver needlessly
- */
- clock-names = "bus", "bus_a";
- clocks = <&xo_board>, <&xo_board>;
};
apcs_glb: mailbox@17911000 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
index 0f9cc042d9bf..1cba1d857c96 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
@@ -70,7 +70,7 @@
&spi0 {
status = "okay";
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
interrupt-parent = <&gpio0>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
index c5e7de60c121..5846a11f0e84 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
@@ -706,7 +706,7 @@ camera: &i2c7 {
&spi2 {
status = "okay";
- cr50@0 {
+ tpm@0 {
compatible = "google,cr50";
reg = <0>;
interrupt-parent = <&gpio1>;
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 7b1975bf4b90..513787e43329 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -760,32 +760,25 @@ alternative_endif
.endm
/*
- * Check whether preempt/bh-disabled asm code should yield as soon as
- * it is able. This is the case if we are currently running in task
- * context, and either a softirq is pending, or the TIF_NEED_RESCHED
- * flag is set and re-enabling preemption a single time would result in
- * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
- * stored negated in the top word of the thread_info::preempt_count
+ * Check whether asm code should yield as soon as it is able. This is
+ * the case if we are currently running in task context, and the
+ * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag
+ * is stored negated in the top word of the thread_info::preempt_count
* field)
*/
- .macro cond_yield, lbl:req, tmp:req, tmp2:req
+ .macro cond_yield, lbl:req, tmp:req, tmp2
+#ifdef CONFIG_PREEMPT_VOLUNTARY
get_current_task \tmp
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
/*
* If we are serving a softirq, there is no point in yielding: the
* softirq will not be preempted no matter what we do, so we should
- * run to completion as quickly as we can.
+ * run to completion as quickly as we can. The preempt_count field will
+ * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will
+ * catch this case too.
*/
- tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
-#ifdef CONFIG_PREEMPTION
- sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
cbz \tmp, \lbl
#endif
- adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
- get_this_cpu_offset \tmp2
- ldr w\tmp, [\tmp, \tmp2]
- cbnz w\tmp, \lbl // yield on pending softirq in task context
-.Lnoyield_\@:
.endm
/*
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index f3034099fd95..b1e43f56ee46 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -38,7 +38,6 @@ struct cpuinfo_32bit {
};
struct cpuinfo_arm64 {
- struct cpu cpu;
struct kobject kobj;
u64 reg_ctr;
u64 reg_cntfrq;
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index ae35939f395b..353fe08546cf 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr)
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
}
+static inline bool esr_fsc_is_translation_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
+}
+
+static inline bool esr_fsc_is_permission_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
+}
+
+static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
+{
+ return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
+}
+
const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 50ce8b697ff3..e93548914c36 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -4,6 +4,8 @@
#ifndef __ASSEMBLER__
+#include <linux/cpumask.h>
+
#include <asm-generic/irq.h>
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b85f46a73e21..3c6f8ba1e479 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -108,6 +108,7 @@
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
/* TCR_EL2 Registers bits */
+#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
@@ -122,6 +123,7 @@
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_DS TCR_EL2_DS
#define VTCR_EL2_RES1 (1U << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
@@ -344,36 +346,47 @@
* Once we get to a point where the two describe the same thing, we'll
* merge the definitions. One day.
*/
-#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
+#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0
#define __HFGRTR_EL2_MASK GENMASK(49, 0)
-#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
+#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK)
-#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
- BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
- GENMASK(26, 25) | BIT(21) | BIT(18) | \
+/*
+ * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any
+ * future additions, define __HFGWTR* macros relative to __HFGRTR* ones.
+ */
+#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+ GENMASK(26, 25) | BIT(21) | BIT(18) | \
GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
-#define __HFGWTR_EL2_MASK GENMASK(49, 0)
-#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50))
-
-#define __HFGITR_EL2_RES0 GENMASK(63, 57)
-#define __HFGITR_EL2_MASK GENMASK(54, 0)
-#define __HFGITR_EL2_nMASK GENMASK(56, 55)
-
-#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \
- GENMASK(21, 20) | BIT(8))
-#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK
-#define __HDFGRTR_EL2_nMASK GENMASK(62, 59)
-
-#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
- BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
- BIT(22) | BIT(9) | BIT(6))
-#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK
-#define __HDFGWTR_EL2_nMASK GENMASK(62, 60)
+#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK)
+#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK)
+#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK)
+
+#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0
+#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0))
+#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK)
+
+#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0
+#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \
+ GENMASK(41, 40) | GENMASK(37, 22) | \
+ GENMASK(19, 9) | GENMASK(7, 0))
+#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK)
+
+#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0
+#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \
+ GENMASK(46, 44) | GENMASK(42, 41) | \
+ GENMASK(37, 35) | GENMASK(33, 31) | \
+ GENMASK(29, 23) | GENMASK(21, 10) | \
+ GENMASK(8, 7) | GENMASK(5, 0))
+#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK)
+
+#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0
+#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0))
+#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK)
/* Similar definitions for HCRX_EL2 */
-#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12))
-#define __HCRX_EL2_MASK (0)
-#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0))
+#define __HCRX_EL2_RES0 HCRX_EL2_RES0
+#define __HCRX_EL2_MASK (BIT(6))
+#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 78a550537b67..b804fe832184 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -17,6 +17,7 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_nested.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
@@ -54,11 +55,6 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
-static inline bool vcpu_has_feature(const struct kvm_vcpu *vcpu, int feature)
-{
- return test_bit(feature, vcpu->kvm->arch.vcpu_features);
-}
-
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
@@ -248,7 +244,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
- return __is_hyp_ctxt(&vcpu->arch.ctxt);
+ return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt);
}
/*
@@ -404,14 +400,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+static inline
+bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
+ return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
}
-static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
+static inline
+bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
{
- return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
+ return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
+}
+
+static inline
+u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
+{
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+
+ BUG_ON(!esr_fsc_is_permission_fault(esr));
+ return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
}
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
@@ -454,12 +461,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
* first), then a permission fault to allow the flags
* to be set.
*/
- switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
- case ESR_ELx_FSC_PERM:
- return true;
- default:
- return false;
- }
+ return kvm_vcpu_trap_is_permission_fault(vcpu);
}
if (kvm_vcpu_trap_is_iabt(vcpu))
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 824f29f04916..21c57b812569 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
+#include <asm/vncr_mapping.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -306,6 +307,7 @@ struct kvm_arch {
* Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
*/
#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
+#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask)
#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
u64 id_regs[KVM_ARM_ID_REG_NUM];
@@ -324,33 +326,33 @@ struct kvm_vcpu_fault_info {
u64 disr_el1; /* Deferred [SError] Status Register */
};
+/*
+ * VNCR() just places the VNCR_capable registers in the enum after
+ * __VNCR_START__, and the value (after correction) to be an 8-byte offset
+ * from the VNCR base. As we don't require the enum to be otherwise ordered,
+ * we need the terrible hack below to ensure that we correctly size the
+ * sys_regs array, no matter what.
+ *
+ * The __MAX__ macro has been lifted from Sean Eron Anderson's wonderful
+ * treasure trove of bit hacks:
+ * https://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define __MAX__(x,y) ((x) ^ (((x) ^ (y)) & -((x) < (y))))
+#define VNCR(r) \
+ __before_##r, \
+ r = __VNCR_START__ + ((VNCR_ ## r) / 8), \
+ __after_##r = __MAX__(__before_##r - 1, r)
+
enum vcpu_sysreg {
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
CLIDR_EL1, /* Cache Level ID Register */
CSSELR_EL1, /* Cache Size Selection Register */
- SCTLR_EL1, /* System Control Register */
- ACTLR_EL1, /* Auxiliary Control Register */
- CPACR_EL1, /* Coprocessor Access Control */
- ZCR_EL1, /* SVE Control */
- TTBR0_EL1, /* Translation Table Base Register 0 */
- TTBR1_EL1, /* Translation Table Base Register 1 */
- TCR_EL1, /* Translation Control Register */
- TCR2_EL1, /* Extended Translation Control Register */
- ESR_EL1, /* Exception Syndrome Register */
- AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
- AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
- FAR_EL1, /* Fault Address Register */
- MAIR_EL1, /* Memory Attribute Indirection Register */
- VBAR_EL1, /* Vector Base Address Register */
- CONTEXTIDR_EL1, /* Context ID Register */
TPIDR_EL0, /* Thread ID, User R/W */
TPIDRRO_EL0, /* Thread ID, User R/O */
TPIDR_EL1, /* Thread ID, Privileged */
- AMAIR_EL1, /* Aux Memory Attribute Indirection Register */
CNTKCTL_EL1, /* Timer Control Register (EL1) */
PAR_EL1, /* Physical Address Register */
- MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
OSLSR_EL1, /* OS Lock Status Register */
DISR_EL1, /* Deferred Interrupt Status Register */
@@ -381,26 +383,11 @@ enum vcpu_sysreg {
APGAKEYLO_EL1,
APGAKEYHI_EL1,
- ELR_EL1,
- SP_EL1,
- SPSR_EL1,
-
- CNTVOFF_EL2,
- CNTV_CVAL_EL0,
- CNTV_CTL_EL0,
- CNTP_CVAL_EL0,
- CNTP_CTL_EL0,
-
/* Memory Tagging Extension registers */
RGSR_EL1, /* Random Allocation Tag Seed Register */
GCR_EL1, /* Tag Control Register */
- TFSR_EL1, /* Tag Fault Status Register (EL1) */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
- /* Permission Indirection Extension registers */
- PIR_EL1, /* Permission Indirection Register 1 (EL1) */
- PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */
-
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -408,21 +395,14 @@ enum vcpu_sysreg {
DBGVCR32_EL2, /* Debug Vector Catch Register */
/* EL2 registers */
- VPIDR_EL2, /* Virtualization Processor ID Register */
- VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */
SCTLR_EL2, /* System Control Register (EL2) */
ACTLR_EL2, /* Auxiliary Control Register (EL2) */
- HCR_EL2, /* Hypervisor Configuration Register */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
- HSTR_EL2, /* Hypervisor System Trap Register */
HACR_EL2, /* Hypervisor Auxiliary Control Register */
- HCRX_EL2, /* Extended Hypervisor Configuration Register */
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
TCR_EL2, /* Translation Control Register (EL2) */
- VTTBR_EL2, /* Virtualization Translation Table Base Register */
- VTCR_EL2, /* Virtualization Translation Control Register */
SPSR_EL2, /* EL2 saved program status register */
ELR_EL2, /* EL2 exception link register */
AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
@@ -435,19 +415,62 @@ enum vcpu_sysreg {
VBAR_EL2, /* Vector Base Address Register (EL2) */
RVBAR_EL2, /* Reset Vector Base Address Register */
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
- TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
- HFGRTR_EL2,
- HFGWTR_EL2,
- HFGITR_EL2,
- HDFGRTR_EL2,
- HDFGWTR_EL2,
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
CNTHV_CTL_EL2,
CNTHV_CVAL_EL2,
+ __VNCR_START__, /* Any VNCR-capable reg goes after this point */
+
+ VNCR(SCTLR_EL1),/* System Control Register */
+ VNCR(ACTLR_EL1),/* Auxiliary Control Register */
+ VNCR(CPACR_EL1),/* Coprocessor Access Control */
+ VNCR(ZCR_EL1), /* SVE Control */
+ VNCR(TTBR0_EL1),/* Translation Table Base Register 0 */
+ VNCR(TTBR1_EL1),/* Translation Table Base Register 1 */
+ VNCR(TCR_EL1), /* Translation Control Register */
+ VNCR(TCR2_EL1), /* Extended Translation Control Register */
+ VNCR(ESR_EL1), /* Exception Syndrome Register */
+ VNCR(AFSR0_EL1),/* Auxiliary Fault Status Register 0 */
+ VNCR(AFSR1_EL1),/* Auxiliary Fault Status Register 1 */
+ VNCR(FAR_EL1), /* Fault Address Register */
+ VNCR(MAIR_EL1), /* Memory Attribute Indirection Register */
+ VNCR(VBAR_EL1), /* Vector Base Address Register */
+ VNCR(CONTEXTIDR_EL1), /* Context ID Register */
+ VNCR(AMAIR_EL1),/* Aux Memory Attribute Indirection Register */
+ VNCR(MDSCR_EL1),/* Monitor Debug System Control Register */
+ VNCR(ELR_EL1),
+ VNCR(SP_EL1),
+ VNCR(SPSR_EL1),
+ VNCR(TFSR_EL1), /* Tag Fault Status Register (EL1) */
+ VNCR(VPIDR_EL2),/* Virtualization Processor ID Register */
+ VNCR(VMPIDR_EL2),/* Virtualization Multiprocessor ID Register */
+ VNCR(HCR_EL2), /* Hypervisor Configuration Register */
+ VNCR(HSTR_EL2), /* Hypervisor System Trap Register */
+ VNCR(VTTBR_EL2),/* Virtualization Translation Table Base Register */
+ VNCR(VTCR_EL2), /* Virtualization Translation Control Register */
+ VNCR(TPIDR_EL2),/* EL2 Software Thread ID Register */
+ VNCR(HCRX_EL2), /* Extended Hypervisor Configuration Register */
+
+ /* Permission Indirection Extension registers */
+ VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
+ VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
+
+ VNCR(HFGRTR_EL2),
+ VNCR(HFGWTR_EL2),
+ VNCR(HFGITR_EL2),
+ VNCR(HDFGRTR_EL2),
+ VNCR(HDFGWTR_EL2),
+ VNCR(HAFGRTR_EL2),
+
+ VNCR(CNTVOFF_EL2),
+ VNCR(CNTV_CVAL_EL0),
+ VNCR(CNTV_CTL_EL0),
+ VNCR(CNTP_CVAL_EL0),
+ VNCR(CNTP_CTL_EL0),
+
NR_SYS_REGS /* Nothing after this line! */
};
@@ -464,6 +487,9 @@ struct kvm_cpu_context {
u64 sys_regs[NR_SYS_REGS];
struct kvm_vcpu *__hyp_running_vcpu;
+
+ /* This pointer has to be 4kB aligned. */
+ u64 *vncr_array;
};
struct kvm_host_data {
@@ -826,8 +852,19 @@ struct kvm_vcpu_arch {
* accessed by a running VCPU. For example, for userspace access or
* for system registers that are never context switched, but only
* emulated.
+ *
+ * Don't bother with VNCR-based accesses in the nVHE code, it has no
+ * business dealing with NV.
*/
-#define __ctxt_sys_reg(c,r) (&(c)->sys_regs[(r)])
+static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
+{
+#if !defined (__KVM_NVHE_HYPERVISOR__)
+ if (unlikely(cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
+ r >= __VNCR_START__ && ctxt->vncr_array))
+ return &ctxt->vncr_array[r - __VNCR_START__];
+#endif
+ return (u64 *)&ctxt->sys_regs[r];
+}
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
@@ -871,6 +908,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break;
+ case SPSR_EL1: *val = read_sysreg_s(SYS_SPSR_EL12); break;
case PAR_EL1: *val = read_sysreg_par(); break;
case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
@@ -915,6 +953,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break;
+ case SPSR_EL1: write_sysreg_s(val, SYS_SPSR_EL12); break;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
@@ -954,8 +993,6 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-
void kvm_arm_halt_guest(struct kvm *kvm);
void kvm_arm_resume_guest(struct kvm *kvm);
@@ -1177,6 +1214,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_vm_has_ran_once(kvm) \
(test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
+static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
+{
+ return test_bit(feature, ka->vcpu_features);
+}
+
+#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 6cec8e9c6c91..4882905357f4 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -2,8 +2,9 @@
#ifndef __ARM64_KVM_NESTED_H
#define __ARM64_KVM_NESTED_H
-#include <asm/kvm_emulate.h>
+#include <linux/bitfield.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
{
@@ -12,12 +13,55 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2));
}
-extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+/* Translation helpers from non-VHE EL2 to EL1 */
+static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
+{
+ return (u64)FIELD_GET(TCR_EL2_PS_MASK, tcr_el2) << TCR_IPS_SHIFT;
+}
+
+static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
+{
+ return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
+ ((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
+ tcr_el2_ps_to_tcr_el1_ips(tcr) |
+ (tcr & TCR_EL2_TG0_MASK) |
+ (tcr & TCR_EL2_ORGN0_MASK) |
+ (tcr & TCR_EL2_IRGN0_MASK) |
+ (tcr & TCR_EL2_T0SZ_MASK);
+}
+
+static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
+{
+ u64 cpacr_el1 = 0;
+
+ if (cptr_el2 & CPTR_EL2_TTA)
+ cpacr_el1 |= CPACR_ELx_TTA;
+ if (!(cptr_el2 & CPTR_EL2_TFP))
+ cpacr_el1 |= CPACR_ELx_FPEN;
+ if (!(cptr_el2 & CPTR_EL2_TZ))
+ cpacr_el1 |= CPACR_ELx_ZEN;
-struct sys_reg_params;
-struct sys_reg_desc;
+ return cpacr_el1;
+}
+
+static inline u64 translate_sctlr_el2_to_sctlr_el1(u64 val)
+{
+ /* Only preserve the minimal set of bits we support */
+ val &= (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | SCTLR_ELx_SA |
+ SCTLR_ELx_I | SCTLR_ELx_IESB | SCTLR_ELx_WXN | SCTLR_ELx_EE);
+ val |= SCTLR_EL1_RES1;
+
+ return val;
+}
+
+static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
+{
+ /* Clear the ASID field */
+ return ttbr0 & ~GENMASK_ULL(63, 48);
+}
+
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
-void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
- const struct sys_reg_desc *r);
+int kvm_init_nv_sysregs(struct kvm *kvm);
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 10068500d601..cfdf40f734b1 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -11,7 +11,8 @@
#include <linux/kvm_host.h>
#include <linux/types.h>
-#define KVM_PGTABLE_MAX_LEVELS 4U
+#define KVM_PGTABLE_FIRST_LEVEL -1
+#define KVM_PGTABLE_LAST_LEVEL 3
/*
* The largest supported block sizes for KVM (no 52-bit PA support):
@@ -20,19 +21,29 @@
* - 64K (level 2): 512MB
*/
#ifdef CONFIG_ARM64_4K_PAGES
-#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1U
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 1
#else
-#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
+#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2
#endif
-#define kvm_lpa2_is_enabled() false
+#define kvm_lpa2_is_enabled() system_supports_lpa2()
+
+static inline u64 kvm_get_parange_max(void)
+{
+ if (kvm_lpa2_is_enabled() ||
+ (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
+ return ID_AA64MMFR0_EL1_PARANGE_52;
+ else
+ return ID_AA64MMFR0_EL1_PARANGE_48;
+}
static inline u64 kvm_get_parange(u64 mmfr0)
{
+ u64 parange_max = kvm_get_parange_max();
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
- if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX)
- parange = ID_AA64MMFR0_EL1_PARANGE_MAX;
+ if (parange > parange_max)
+ parange = parange_max;
return parange;
}
@@ -43,6 +54,8 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
+#define KVM_PTE_ADDR_MASK_LPA2 GENMASK(49, PAGE_SHIFT)
+#define KVM_PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
#define KVM_PHYS_INVALID (-1ULL)
@@ -53,21 +66,34 @@ static inline bool kvm_pte_valid(kvm_pte_t pte)
static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
{
- u64 pa = pte & KVM_PTE_ADDR_MASK;
-
- if (PAGE_SHIFT == 16)
- pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ u64 pa;
+
+ if (kvm_lpa2_is_enabled()) {
+ pa = pte & KVM_PTE_ADDR_MASK_LPA2;
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
+ } else {
+ pa = pte & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16)
+ pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
+ }
return pa;
}
static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
{
- kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
-
- if (PAGE_SHIFT == 16) {
- pa &= GENMASK(51, 48);
- pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ kvm_pte_t pte;
+
+ if (kvm_lpa2_is_enabled()) {
+ pte = pa & KVM_PTE_ADDR_MASK_LPA2;
+ pa &= GENMASK(51, 50);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
+ } else {
+ pte = pa & KVM_PTE_ADDR_MASK;
+ if (PAGE_SHIFT == 16) {
+ pa &= GENMASK(51, 48);
+ pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+ }
}
return pte;
@@ -78,28 +104,28 @@ static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
return __phys_to_pfn(kvm_pte_to_phys(pte));
}
-static inline u64 kvm_granule_shift(u32 level)
+static inline u64 kvm_granule_shift(s8 level)
{
- /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
+ /* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
}
-static inline u64 kvm_granule_size(u32 level)
+static inline u64 kvm_granule_size(s8 level)
{
return BIT(kvm_granule_shift(level));
}
-static inline bool kvm_level_supports_block_mapping(u32 level)
+static inline bool kvm_level_supports_block_mapping(s8 level)
{
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
}
static inline u32 kvm_supported_block_sizes(void)
{
- u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
+ s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
u32 r = 0;
- for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
r |= BIT(kvm_granule_shift(level));
return r;
@@ -144,7 +170,7 @@ struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
void* (*zalloc_pages_exact)(size_t size);
void (*free_pages_exact)(void *addr, size_t size);
- void (*free_unlinked_table)(void *addr, u32 level);
+ void (*free_unlinked_table)(void *addr, s8 level);
void (*get_page)(void *addr);
void (*put_page)(void *addr);
int (*page_count)(void *addr);
@@ -240,7 +266,7 @@ struct kvm_pgtable_visit_ctx {
u64 start;
u64 addr;
u64 end;
- u32 level;
+ s8 level;
enum kvm_pgtable_walk_flags flags;
};
@@ -343,7 +369,7 @@ static inline bool kvm_pgtable_walk_lock_held(void)
*/
struct kvm_pgtable {
u32 ia_bits;
- u32 start_level;
+ s8 start_level;
kvm_pteref_t pgd;
struct kvm_pgtable_mm_ops *mm_ops;
@@ -477,7 +503,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* The page-table is assumed to be unreachable by any hardware walkers prior to
* freeing and therefore no TLB invalidation is performed.
*/
-void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
/**
* kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
@@ -501,7 +527,7 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
* an ERR_PTR(error) on failure.
*/
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
- u64 phys, u32 level,
+ u64 phys, s8 level,
enum kvm_pgtable_prot prot,
void *mc, bool force_pte);
@@ -727,7 +753,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
- kvm_pte_t *ptep, u32 *level);
+ kvm_pte_t *ptep, s8 *level);
/**
* kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index e46250a02017..ad9cfb5c1ff4 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -56,10 +56,11 @@ static inline unsigned long hyp_vm_table_pages(void)
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
{
- unsigned long total = 0, i;
+ unsigned long total = 0;
+ int i;
/* Provision the worst case scenario */
- for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ for (i = KVM_PGTABLE_FIRST_LEVEL; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
total += nr_pages;
}
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
new file mode 100644
index 000000000000..df2c47c55972
--- /dev/null
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System register offsets in the VNCR page
+ * All offsets are *byte* displacements!
+ */
+
+#ifndef __ARM64_VNCR_MAPPING_H__
+#define __ARM64_VNCR_MAPPING_H__
+
+#define VNCR_VTTBR_EL2 0x020
+#define VNCR_VTCR_EL2 0x040
+#define VNCR_VMPIDR_EL2 0x050
+#define VNCR_CNTVOFF_EL2 0x060
+#define VNCR_HCR_EL2 0x078
+#define VNCR_HSTR_EL2 0x080
+#define VNCR_VPIDR_EL2 0x088
+#define VNCR_TPIDR_EL2 0x090
+#define VNCR_HCRX_EL2 0x0A0
+#define VNCR_VNCR_EL2 0x0B0
+#define VNCR_CPACR_EL1 0x100
+#define VNCR_CONTEXTIDR_EL1 0x108
+#define VNCR_SCTLR_EL1 0x110
+#define VNCR_ACTLR_EL1 0x118
+#define VNCR_TCR_EL1 0x120
+#define VNCR_AFSR0_EL1 0x128
+#define VNCR_AFSR1_EL1 0x130
+#define VNCR_ESR_EL1 0x138
+#define VNCR_MAIR_EL1 0x140
+#define VNCR_AMAIR_EL1 0x148
+#define VNCR_MDSCR_EL1 0x158
+#define VNCR_SPSR_EL1 0x160
+#define VNCR_CNTV_CVAL_EL0 0x168
+#define VNCR_CNTV_CTL_EL0 0x170
+#define VNCR_CNTP_CVAL_EL0 0x178
+#define VNCR_CNTP_CTL_EL0 0x180
+#define VNCR_SCXTNUM_EL1 0x188
+#define VNCR_TFSR_EL1 0x190
+#define VNCR_HFGRTR_EL2 0x1B8
+#define VNCR_HFGWTR_EL2 0x1C0
+#define VNCR_HFGITR_EL2 0x1C8
+#define VNCR_HDFGRTR_EL2 0x1D0
+#define VNCR_HDFGWTR_EL2 0x1D8
+#define VNCR_ZCR_EL1 0x1E0
+#define VNCR_HAFGRTR_EL2 0x1E8
+#define VNCR_TTBR0_EL1 0x200
+#define VNCR_TTBR1_EL1 0x210
+#define VNCR_FAR_EL1 0x220
+#define VNCR_ELR_EL1 0x230
+#define VNCR_SP_EL1 0x240
+#define VNCR_VBAR_EL1 0x250
+#define VNCR_TCR2_EL1 0x270
+#define VNCR_PIRE0_EL1 0x290
+#define VNCR_PIRE0_EL2 0x298
+#define VNCR_PIR_EL1 0x2A0
+#define VNCR_ICH_LR0_EL2 0x400
+#define VNCR_ICH_LR1_EL2 0x408
+#define VNCR_ICH_LR2_EL2 0x410
+#define VNCR_ICH_LR3_EL2 0x418
+#define VNCR_ICH_LR4_EL2 0x420
+#define VNCR_ICH_LR5_EL2 0x428
+#define VNCR_ICH_LR6_EL2 0x430
+#define VNCR_ICH_LR7_EL2 0x438
+#define VNCR_ICH_LR8_EL2 0x440
+#define VNCR_ICH_LR9_EL2 0x448
+#define VNCR_ICH_LR10_EL2 0x450
+#define VNCR_ICH_LR11_EL2 0x458
+#define VNCR_ICH_LR12_EL2 0x460
+#define VNCR_ICH_LR13_EL2 0x468
+#define VNCR_ICH_LR14_EL2 0x470
+#define VNCR_ICH_LR15_EL2 0x478
+#define VNCR_ICH_AP0R0_EL2 0x480
+#define VNCR_ICH_AP0R1_EL2 0x488
+#define VNCR_ICH_AP0R2_EL2 0x490
+#define VNCR_ICH_AP0R3_EL2 0x498
+#define VNCR_ICH_AP1R0_EL2 0x4A0
+#define VNCR_ICH_AP1R1_EL2 0x4A8
+#define VNCR_ICH_AP1R2_EL2 0x4B0
+#define VNCR_ICH_AP1R3_EL2 0x4B8
+#define VNCR_ICH_HCR_EL2 0x4C0
+#define VNCR_ICH_VMCR_EL2 0x4C8
+#define VNCR_VDISR_EL2 0x500
+#define VNCR_PMBLIMITR_EL1 0x800
+#define VNCR_PMBPTR_EL1 0x810
+#define VNCR_PMBSR_EL1 0x820
+#define VNCR_PMSCR_EL1 0x828
+#define VNCR_PMSEVFR_EL1 0x830
+#define VNCR_PMSICR_EL1 0x838
+#define VNCR_PMSIRR_EL1 0x840
+#define VNCR_PMSLATFR_EL1 0x848
+#define VNCR_TRFCR_EL1 0x880
+#define VNCR_MPAM1_EL1 0x900
+#define VNCR_MPAMHCR_EL2 0x930
+#define VNCR_MPAMVPMV_EL2 0x938
+#define VNCR_MPAMVPM0_EL2 0x940
+#define VNCR_MPAMVPM1_EL2 0x948
+#define VNCR_MPAMVPM2_EL2 0x950
+#define VNCR_MPAMVPM3_EL2 0x958
+#define VNCR_MPAMVPM4_EL2 0x960
+#define VNCR_MPAMVPM5_EL2 0x968
+#define VNCR_MPAMVPM6_EL2 0x970
+#define VNCR_MPAMVPM7_EL2 0x978
+
+#endif /* __ARM64_VNCR_MAPPING_H__ */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d95b3d6b471a..e5d03a7039b4 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -73,7 +73,13 @@ obj-$(CONFIG_ARM64_MTE) += mte.o
obj-y += vdso-wrap.o
obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o
obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o
-CFLAGS_patch-scs.o += -mbranch-protection=none
+
+# We need to prevent the SCS patching code from patching itself. Using
+# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
+# generated triggers an issue with full LTO on Clang, which stops emitting PAC
+# instructions altogether. So instead, omit the unwind tables used by the
+# patching code, so it will not be able to locate its own PAC instructions.
+CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables
# Force dependency (vdso*-wrap.S includes vdso.so through incbin)
$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5ff1942b04fc..5a7dbbe0ce63 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -117,8 +117,6 @@ int main(void)
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
- DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
- DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
BLANK();
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index e29e0fea63fb..967c7c7a4e7d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -416,6 +416,19 @@ static struct midr_range broken_aarch32_aes[] = {
};
#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+static const struct midr_range erratum_spec_unpriv_load_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_3117295
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2966298
+ /* Cortex-A520 r0p0 to r0p1 */
+ MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+#endif
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -713,12 +726,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
},
#endif
-#ifdef CONFIG_ARM64_ERRATUM_2966298
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
{
- .desc = "ARM erratum 2966298",
- .capability = ARM64_WORKAROUND_2966298,
+ .desc = "ARM errata 2966298, 3117295",
+ .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD,
/* Cortex-A520 r0p0 - r0p1 */
- ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+ ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list),
},
#endif
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 01a4c1d7fc09..8d1a634a403e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2341,7 +2341,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
- ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, IMP)
+ ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a6030913cd58..7ef0e127b149 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -428,16 +428,9 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
.if \el == 0
-alternative_if ARM64_WORKAROUND_2966298
- tlbi vale1, xzr
- dsb nsh
-alternative_else_nop_endif
-alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
- ldr lr, [sp, #S_LR]
- add sp, sp, #PT_REGS_SIZE // restore sp
- eret
-alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0
+
msr far_el1, x29
ldr_this_cpu x30, this_cpu_vector, x29
@@ -446,16 +439,26 @@ alternative_else_nop_endif
ldr lr, [sp, #S_LR] // restore x30
add sp, sp, #PT_REGS_SIZE // restore sp
br x29
+
+.L_skip_tramp_exit_\@:
#endif
- .else
+ .endif
+
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
+ .if \el == 0
+ /* This must be after the last explicit memory access */
+alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+ tlbi vale1, xzr
+ dsb nsh
+alternative_else_nop_endif
+ .else
/* Ensure any device/NC reads complete */
alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
+ .endif
eret
- .endif
sb
.endm
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 505f389be3e0..a5dc6f764195 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -898,10 +898,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
* allocate SVE now in case it is needed for use in streaming
* mode.
*/
- if (system_supports_sve()) {
- sve_free(task);
- sve_alloc(task, true);
- }
+ sve_free(task);
+ sve_alloc(task, true);
if (free_sme)
sme_free(task);
@@ -1219,8 +1217,10 @@ void fpsimd_release_task(struct task_struct *dead_task)
*/
void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.sme_state && flush) {
- memset(task->thread.sme_state, 0, sme_state_size(task));
+ if (task->thread.sme_state) {
+ if (flush)
+ memset(task->thread.sme_state, 0,
+ sme_state_size(task));
return;
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 09bb7fc7d3c2..dc6cf0e37194 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1108,12 +1108,13 @@ static int za_set(struct task_struct *target,
}
}
- /* Allocate/reinit ZA storage */
- sme_alloc(target, true);
- if (!target->thread.sme_state) {
- ret = -ENOMEM;
- goto out;
- }
+ /*
+ * Only flush the storage if PSTATE.ZA was not already set,
+ * otherwise preserve any existing data.
+ */
+ sme_alloc(target, !thread_za_enabled(&target->thread));
+ if (!target->thread.sme_state)
+ return -ENOMEM;
/* If there is no data then disable ZA */
if (!count) {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 417a8a86b2db..42c690bb2d60 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -402,19 +402,10 @@ static inline bool cpu_can_disable(unsigned int cpu)
return false;
}
-static int __init topology_init(void)
+bool arch_cpu_is_hotpluggable(int num)
{
- int i;
-
- for_each_possible_cpu(i) {
- struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
- cpu->hotpluggable = cpu_can_disable(i);
- register_cpu(cpu, i);
- }
-
- return 0;
+ return cpu_can_disable(num);
}
-subsys_initcall(topology_init);
static void dump_kernel_offset(void)
{
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 83c1e09be42e..6c3c8ca73e7f 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,16 +21,14 @@ if VIRTUALIZATION
menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
+ select KVM_COMMON
select KVM_GENERIC_HARDWARE_ENABLING
- select MMU_NOTIFIER
- select PREEMPT_NOTIFIERS
+ select KVM_GENERIC_MMU_NOTIFIER
select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK
select KVM_VFIO
- select HAVE_KVM_EVENTFD
- select HAVE_KVM_IRQFD
select HAVE_KVM_DIRTY_RING_ACQ_REL
select NEED_KVM_DIRTY_RING_WITH_BITMAP
select HAVE_KVM_MSI
@@ -41,7 +39,6 @@ menuconfig KVM
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
- select INTERVAL_TREE
select XARRAY_MULTI
help
Support hosting virtualized guest machines.
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 13ba691b848f..9dec8c419bf4 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -295,8 +295,7 @@ static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
struct arch_timer_context *ctx;
- ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu)
- : vcpu_vtimer(vcpu);
+ ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
return kvm_counter_compute_delta(ctx, val);
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 4796104c4471..a25265aca432 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -221,7 +221,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = vgic_present;
break;
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
@@ -669,6 +668,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
}
+ if (vcpu_has_nv(vcpu)) {
+ ret = kvm_init_nv_sysregs(vcpu->kvm);
+ if (ret)
+ return ret;
+ }
+
ret = kvm_timer_enable(vcpu);
if (ret)
return ret;
@@ -1837,6 +1842,7 @@ static int kvm_init_vector_slots(void)
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
{
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
+ u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
unsigned long tcr;
/*
@@ -1859,6 +1865,10 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
}
tcr &= ~TCR_T0SZ_MASK;
tcr |= TCR_T0SZ(hyp_va_bits);
+ tcr &= ~TCR_EL2_PS_MASK;
+ tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0));
+ if (kvm_lpa2_is_enabled())
+ tcr |= TCR_EL2_DS;
params->tcr_el2 = tcr;
params->pgd_pa = kvm_mmu_get_httbr();
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 06185216a297..431fd429932d 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -1012,6 +1012,7 @@ enum fgt_group_id {
HDFGRTR_GROUP,
HDFGWTR_GROUP,
HFGITR_GROUP,
+ HAFGRTR_GROUP,
/* Must be last */
__NR_FGT_GROUP_IDS__
@@ -1042,10 +1043,20 @@ enum fg_filter_id {
static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
/* HFGRTR_EL2, HFGWTR_EL2 */
+ SR_FGT(SYS_AMAIR2_EL1, HFGxTR, nAMAIR2_EL1, 0),
+ SR_FGT(SYS_MAIR2_EL1, HFGxTR, nMAIR2_EL1, 0),
+ SR_FGT(SYS_S2POR_EL1, HFGxTR, nS2POR_EL1, 0),
+ SR_FGT(SYS_POR_EL1, HFGxTR, nPOR_EL1, 0),
+ SR_FGT(SYS_POR_EL0, HFGxTR, nPOR_EL0, 0),
SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0),
SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0),
+ SR_FGT(SYS_RCWMASK_EL1, HFGxTR, nRCWMASK_EL1, 0),
SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
+ SR_FGT(SYS_GCSCR_EL1, HFGxTR, nGCS_EL1, 0),
+ SR_FGT(SYS_GCSPR_EL1, HFGxTR, nGCS_EL1, 0),
+ SR_FGT(SYS_GCSCRE0_EL1, HFGxTR, nGCS_EL0, 0),
+ SR_FGT(SYS_GCSPR_EL0, HFGxTR, nGCS_EL0, 0),
SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),
SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1),
SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1),
@@ -1107,6 +1118,11 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1),
SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1),
/* HFGITR_EL2 */
+ SR_FGT(OP_AT_S1E1A, HFGITR, ATS1E1A, 1),
+ SR_FGT(OP_COSP_RCTX, HFGITR, COSPRCTX, 1),
+ SR_FGT(OP_GCSPUSHX, HFGITR, nGCSEPP, 0),
+ SR_FGT(OP_GCSPOPX, HFGITR, nGCSEPP, 0),
+ SR_FGT(OP_GCSPUSHM, HFGITR, nGCSPUSHM_EL1, 0),
SR_FGT(OP_BRB_IALL, HFGITR, nBRBIALL, 0),
SR_FGT(OP_BRB_INJ, HFGITR, nBRBINJ, 0),
SR_FGT(SYS_DC_CVAC, HFGITR, DCCVAC, 1),
@@ -1674,6 +1690,49 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1),
SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1),
SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1),
+ /*
+ * HAFGRTR_EL2
+ */
+ SR_FGT(SYS_AMEVTYPER1_EL0(15), HAFGRTR, AMEVTYPER115_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(14), HAFGRTR, AMEVTYPER114_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(13), HAFGRTR, AMEVTYPER113_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(12), HAFGRTR, AMEVTYPER112_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(11), HAFGRTR, AMEVTYPER111_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(10), HAFGRTR, AMEVTYPER110_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(9), HAFGRTR, AMEVTYPER19_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(8), HAFGRTR, AMEVTYPER18_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(7), HAFGRTR, AMEVTYPER17_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(6), HAFGRTR, AMEVTYPER16_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(5), HAFGRTR, AMEVTYPER15_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(4), HAFGRTR, AMEVTYPER14_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(3), HAFGRTR, AMEVTYPER13_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(2), HAFGRTR, AMEVTYPER12_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(1), HAFGRTR, AMEVTYPER11_EL0, 1),
+ SR_FGT(SYS_AMEVTYPER1_EL0(0), HAFGRTR, AMEVTYPER10_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(15), HAFGRTR, AMEVCNTR115_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(14), HAFGRTR, AMEVCNTR114_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(13), HAFGRTR, AMEVCNTR113_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(12), HAFGRTR, AMEVCNTR112_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(11), HAFGRTR, AMEVCNTR111_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(10), HAFGRTR, AMEVCNTR110_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(9), HAFGRTR, AMEVCNTR19_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(8), HAFGRTR, AMEVCNTR18_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(7), HAFGRTR, AMEVCNTR17_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(6), HAFGRTR, AMEVCNTR16_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(5), HAFGRTR, AMEVCNTR15_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(4), HAFGRTR, AMEVCNTR14_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(3), HAFGRTR, AMEVCNTR13_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(2), HAFGRTR, AMEVCNTR12_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(1), HAFGRTR, AMEVCNTR11_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR1_EL0(0), HAFGRTR, AMEVCNTR10_EL0, 1),
+ SR_FGT(SYS_AMCNTENCLR1_EL0, HAFGRTR, AMCNTEN1, 1),
+ SR_FGT(SYS_AMCNTENSET1_EL0, HAFGRTR, AMCNTEN1, 1),
+ SR_FGT(SYS_AMCNTENCLR0_EL0, HAFGRTR, AMCNTEN0, 1),
+ SR_FGT(SYS_AMCNTENSET0_EL0, HAFGRTR, AMCNTEN0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(3), HAFGRTR, AMEVCNTR03_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(2), HAFGRTR, AMEVCNTR02_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(1), HAFGRTR, AMEVCNTR01_EL0, 1),
+ SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1),
};
static union trap_config get_trap_config(u32 sysreg)
@@ -1894,6 +1953,10 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
break;
+ case HAFGRTR_GROUP:
+ val = sanitised_sys_reg(vcpu, HAFGRTR_EL2);
+ break;
+
case HFGITR_GROUP:
val = sanitised_sys_reg(vcpu, HFGITR_EL2);
switch (tc.fgf) {
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 9ddcfe2c3e57..9e13c1bc2ad5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
+ esr_fsc_is_permission_fault(esr))) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index f99d8af0b9af..a038320cdb08 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -79,6 +79,45 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
clr |= ~hfg & __ ## reg ## _nMASK; \
} while(0)
+#define update_fgt_traps_cs(vcpu, reg, clr, set) \
+ do { \
+ struct kvm_cpu_context *hctxt = \
+ &this_cpu_ptr(&kvm_host_data)->host_ctxt; \
+ u64 c = 0, s = 0; \
+ \
+ ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \
+ compute_clr_set(vcpu, reg, c, s); \
+ s |= set; \
+ c |= clr; \
+ if (c || s) { \
+ u64 val = __ ## reg ## _nMASK; \
+ val |= s; \
+ val &= ~c; \
+ write_sysreg_s(val, SYS_ ## reg); \
+ } \
+ } while(0)
+
+#define update_fgt_traps(vcpu, reg) \
+ update_fgt_traps_cs(vcpu, reg, 0, 0)
+
+/*
+ * Validate the fine grain trap masks.
+ * Check that the masks do not overlap and that all bits are accounted for.
+ */
+#define CHECK_FGT_MASKS(reg) \
+ do { \
+ BUILD_BUG_ON((__ ## reg ## _MASK) & (__ ## reg ## _nMASK)); \
+ BUILD_BUG_ON(~((__ ## reg ## _RES0) ^ (__ ## reg ## _MASK) ^ \
+ (__ ## reg ## _nMASK))); \
+ } while(0)
+
+static inline bool cpu_has_amu(void)
+{
+ u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+
+ return cpuid_feature_extract_unsigned_field(pfr0,
+ ID_AA64PFR0_EL1_AMU_SHIFT);
+}
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
@@ -86,6 +125,14 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
u64 r_val, w_val;
+ CHECK_FGT_MASKS(HFGRTR_EL2);
+ CHECK_FGT_MASKS(HFGWTR_EL2);
+ CHECK_FGT_MASKS(HFGITR_EL2);
+ CHECK_FGT_MASKS(HDFGRTR_EL2);
+ CHECK_FGT_MASKS(HDFGWTR_EL2);
+ CHECK_FGT_MASKS(HAFGRTR_EL2);
+ CHECK_FGT_MASKS(HCRX_EL2);
+
if (!cpus_have_final_cap(ARM64_HAS_FGT))
return;
@@ -110,12 +157,15 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
}
- /* The default is not to trap anything but ACCDATA_EL1 */
- r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+ /* The default to trap everything not handled or supported in KVM. */
+ tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 |
+ HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1;
+
+ r_val = __HFGRTR_EL2_nMASK & ~tmp;
r_val |= r_set;
r_val &= ~r_clr;
- w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+ w_val = __HFGWTR_EL2_nMASK & ~tmp;
w_val |= w_set;
w_val &= ~w_clr;
@@ -125,34 +175,12 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
return;
- ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
+ update_fgt_traps(vcpu, HFGITR_EL2);
+ update_fgt_traps(vcpu, HDFGRTR_EL2);
+ update_fgt_traps(vcpu, HDFGWTR_EL2);
- r_set = r_clr = 0;
- compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
- r_val = __HFGITR_EL2_nMASK;
- r_val |= r_set;
- r_val &= ~r_clr;
-
- write_sysreg_s(r_val, SYS_HFGITR_EL2);
-
- ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
- ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
-
- r_clr = r_set = w_clr = w_set = 0;
-
- compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
- compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
-
- r_val = __HDFGRTR_EL2_nMASK;
- r_val |= r_set;
- r_val &= ~r_clr;
-
- w_val = __HDFGWTR_EL2_nMASK;
- w_val |= w_set;
- w_val &= ~w_clr;
-
- write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
- write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
+ if (cpu_has_amu())
+ update_fgt_traps(vcpu, HAFGRTR_EL2);
}
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
@@ -171,6 +199,9 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
+
+ if (cpu_has_amu())
+ write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2);
}
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -591,7 +622,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
- valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
+ valid = kvm_vcpu_trap_is_translation_fault(vcpu) &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index e91922daa8ca..51f043649146 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -69,6 +69,8 @@
ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
)
+#define PVM_ID_AA64PFR2_ALLOW 0ULL
+
/*
* Allow for protected VMs:
* - Mixed-endian
@@ -101,6 +103,7 @@
* - Privileged Access Never
* - SError interrupt exceptions from speculative reads
* - Enhanced Translation Synchronization
+ * - Control for cache maintenance permission
*/
#define PVM_ID_AA64MMFR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
@@ -108,7 +111,8 @@
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
- ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_CMOW) \
)
/*
@@ -133,6 +137,8 @@
ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
)
+#define PVM_ID_AA64MMFR3_ALLOW (0ULL)
+
/*
* No support for Scalable Vectors for protected VMs:
* Requires additional support from KVM, e.g., context-switching and
@@ -178,10 +184,18 @@
ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \
)
+/* Restrict pointer authentication to the basic version. */
+#define PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), ID_AA64ISAR1_EL1_APA_PAuth) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), ID_AA64ISAR1_EL1_API_PAuth) \
+ )
+
+#define PVM_ID_AA64ISAR2_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3), ID_AA64ISAR2_EL1_APA3_PAuth) \
+ )
+
#define PVM_ID_AA64ISAR1_ALLOW (\
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \
ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \
@@ -196,8 +210,8 @@
)
#define PVM_ID_AA64ISAR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_ATS1A)| \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \
- ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) | \
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS) \
)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 1cc06e6797bd..2994878d68ea 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -122,11 +122,7 @@ alternative_if ARM64_HAS_CNP
alternative_else_nop_endif
msr ttbr0_el2, x2
- /*
- * Set the PS bits in TCR_EL2.
- */
ldr x0, [x0, #NVHE_INIT_TCR_EL2]
- tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
msr tcr_el2, x0
isb
@@ -292,6 +288,8 @@ alternative_else_nop_endif
mov sp, x0
/* And turn the MMU back on! */
+ dsb nsh
+ isb
set_sctlr_el2 x2
ret x1
SYM_FUNC_END(__pkvm_init_switch_pgd)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 8d0a5834e883..861c76021a25 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -91,7 +91,7 @@ static void host_s2_put_page(void *addr)
hyp_put_page(&host_s2_pool, addr);
}
-static void host_s2_free_unlinked_table(void *addr, u32 level)
+static void host_s2_free_unlinked_table(void *addr, s8 level)
{
kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
}
@@ -443,7 +443,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
{
struct kvm_mem_range cur;
kvm_pte_t pte;
- u32 level;
+ s8 level;
int ret;
hyp_assert_lock_held(&host_mmu.lock);
@@ -462,7 +462,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
cur.start = ALIGN_DOWN(addr, granule);
cur.end = cur.start + granule;
level++;
- } while ((level < KVM_PGTABLE_MAX_LEVELS) &&
+ } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
!(kvm_level_supports_block_mapping(level) &&
range_included(&cur, range)));
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 65a7a186d7b2..b01a3d1078a8 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -260,7 +260,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
*/
dsb(ishst);
- __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
dsb(ish);
isb();
}
@@ -275,7 +275,7 @@ static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
{
struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
- if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1)
+ if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
return -EINVAL;
slot->addr = ctx->addr;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index b29f15418c0a..26dd9a20ad6e 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -136,6 +136,10 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
cptr_set |= CPTR_EL2_TTA;
}
+ /* Trap External Trace */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
+ mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
+
vcpu->arch.mdcr_el2 |= mdcr_set;
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
vcpu->arch.cptr_el2 |= cptr_set;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0d5e0a89ddce..bc58d1b515af 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -181,7 +181,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (!kvm_pte_valid(ctx->old))
return 0;
- if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
+ if (ctx->level != KVM_PGTABLE_LAST_LEVEL)
return -EINVAL;
phys = kvm_pte_to_phys(ctx->old);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 1966fdee740e..c651df904fe3 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -79,7 +79,10 @@ static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx)
static bool kvm_phys_is_valid(u64 phys)
{
- return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
+ u64 parange_max = kvm_get_parange_max();
+ u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max);
+
+ return phys < BIT(shift);
}
static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
@@ -98,7 +101,7 @@ static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx,
return IS_ALIGNED(ctx->addr, granule);
}
-static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
+static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level)
{
u64 shift = kvm_granule_shift(level);
u64 mask = BIT(PAGE_SHIFT - 3) - 1;
@@ -114,7 +117,7 @@ static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
return (addr & mask) >> shift;
}
-static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
+static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level)
{
struct kvm_pgtable pgt = {
.ia_bits = ia_bits,
@@ -124,9 +127,9 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
}
-static bool kvm_pte_table(kvm_pte_t pte, u32 level)
+static bool kvm_pte_table(kvm_pte_t pte, s8 level)
{
- if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+ if (level == KVM_PGTABLE_LAST_LEVEL)
return false;
if (!kvm_pte_valid(pte))
@@ -154,11 +157,11 @@ static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops
return pte;
}
-static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
+static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level)
{
kvm_pte_t pte = kvm_phys_to_pte(pa);
- u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
- KVM_PTE_TYPE_BLOCK;
+ u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE :
+ KVM_PTE_TYPE_BLOCK;
pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
@@ -203,11 +206,11 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker,
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
- struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
+ struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level);
static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
struct kvm_pgtable_mm_ops *mm_ops,
- kvm_pteref_t pteref, u32 level)
+ kvm_pteref_t pteref, s8 level)
{
enum kvm_pgtable_walk_flags flags = data->walker->flags;
kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
@@ -272,12 +275,13 @@ out:
}
static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
- struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level)
+ struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level)
{
u32 idx;
int ret = 0;
- if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
+ if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL ||
+ level > KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
@@ -340,7 +344,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct leaf_walk_data {
kvm_pte_t pte;
- u32 level;
+ s8 level;
};
static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -355,7 +359,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
}
int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
- kvm_pte_t *ptep, u32 *level)
+ kvm_pte_t *ptep, s8 *level)
{
struct leaf_walk_data data;
struct kvm_pgtable_walker walker = {
@@ -408,7 +412,8 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
}
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
- attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
+ if (!kvm_lpa2_is_enabled())
+ attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
@@ -467,7 +472,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
if (hyp_map_walker_try_leaf(ctx, data))
return 0;
- if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
+ if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
@@ -563,14 +568,19 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
struct kvm_pgtable_mm_ops *mm_ops)
{
- u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
+ s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 -
+ ARM64_HW_PGTABLE_LEVELS(va_bits);
+
+ if (start_level < KVM_PGTABLE_FIRST_LEVEL ||
+ start_level > KVM_PGTABLE_LAST_LEVEL)
+ return -EINVAL;
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
if (!pgt->pgd)
return -ENOMEM;
pgt->ia_bits = va_bits;
- pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
+ pgt->start_level = start_level;
pgt->mm_ops = mm_ops;
pgt->mmu = NULL;
pgt->force_pte_cb = NULL;
@@ -624,7 +634,7 @@ struct stage2_map_data {
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
{
u64 vtcr = VTCR_EL2_FLAGS;
- u8 lvls;
+ s8 lvls;
vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
vtcr |= VTCR_EL2_T0SZ(phys_shift);
@@ -635,6 +645,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
lvls = stage2_pgtable_levels(phys_shift);
if (lvls < 2)
lvls = 2;
+
+ /*
+ * When LPA2 is enabled, the HW supports an extra level of translation
+ * (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2
+ * to as an addition to SL0 to enable encoding this extra start level.
+ * However, since we always use concatenated pages for the first level
+ * lookup, we will never need this extra level and therefore do not need
+ * to touch SL2.
+ */
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
#ifdef CONFIG_ARM64_HW_AFDBM
@@ -654,6 +673,9 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
vtcr |= VTCR_EL2_HA;
#endif /* CONFIG_ARM64_HW_AFDBM */
+ if (kvm_lpa2_is_enabled())
+ vtcr |= VTCR_EL2_DS;
+
/* Set the vmid bits */
vtcr |= (get_vmid_bits(mmfr1) == 16) ?
VTCR_EL2_VS_16BIT :
@@ -711,7 +733,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
if (prot & KVM_PGTABLE_PROT_W)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
- attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
+ if (!kvm_lpa2_is_enabled())
+ attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
+
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
*ptep = attr;
@@ -902,7 +926,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
{
u64 phys = stage2_map_walker_phys_addr(ctx, data);
- if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
+ if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL)
return false;
return kvm_block_mapping_supported(ctx, phys);
@@ -981,7 +1005,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (ret != -E2BIG)
return ret;
- if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
+ if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL))
return -EINVAL;
if (!data->memcache)
@@ -1151,7 +1175,7 @@ struct stage2_attr_data {
kvm_pte_t attr_set;
kvm_pte_t attr_clr;
kvm_pte_t pte;
- u32 level;
+ s8 level;
};
static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -1194,7 +1218,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
u64 size, kvm_pte_t attr_set,
kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
- u32 *level, enum kvm_pgtable_walk_flags flags)
+ s8 *level, enum kvm_pgtable_walk_flags flags)
{
int ret;
kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
@@ -1296,7 +1320,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot)
{
int ret;
- u32 level;
+ s8 level;
kvm_pte_t set = 0, clr = 0;
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
@@ -1349,7 +1373,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
}
kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
- u64 phys, u32 level,
+ u64 phys, s8 level,
enum kvm_pgtable_prot prot,
void *mc, bool force_pte)
{
@@ -1407,7 +1431,7 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
* fully populated tree up to the PTE entries. Note that @level is
* interpreted as in "level @level entry".
*/
-static int stage2_block_get_nr_page_tables(u32 level)
+static int stage2_block_get_nr_page_tables(s8 level)
{
switch (level) {
case 1:
@@ -1418,7 +1442,7 @@ static int stage2_block_get_nr_page_tables(u32 level)
return 0;
default:
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
- level >= KVM_PGTABLE_MAX_LEVELS);
+ level > KVM_PGTABLE_LAST_LEVEL);
return -EINVAL;
};
}
@@ -1431,13 +1455,13 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_s2_mmu *mmu;
kvm_pte_t pte = ctx->old, new, *childp;
enum kvm_pgtable_prot prot;
- u32 level = ctx->level;
+ s8 level = ctx->level;
bool force_pte;
int nr_pages;
u64 phys;
/* No huge-pages exist at the last level */
- if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+ if (level == KVM_PGTABLE_LAST_LEVEL)
return 0;
/* We only split valid block mappings */
@@ -1514,7 +1538,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
u64 vtcr = mmu->vtcr;
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
- u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+ s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
@@ -1537,7 +1561,7 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
{
u32 ia_bits = VTCR_EL2_IPA(vtcr);
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
- u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+ s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
}
@@ -1573,7 +1597,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
pgt->pgd = NULL;
}
-void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
struct kvm_pgtable_walker walker = {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d87c8fcc4c24..d14504821b79 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -223,12 +223,12 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
{
struct page *page = container_of(head, struct page, rcu_head);
void *pgtable = page_to_virt(page);
- u32 level = page_private(page);
+ s8 level = page_private(page);
kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
}
-static void stage2_free_unlinked_table(void *addr, u32 level)
+static void stage2_free_unlinked_table(void *addr, s8 level)
{
struct page *page = virt_to_page(addr);
@@ -804,13 +804,13 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
struct kvm_pgtable pgt = {
.pgd = (kvm_pteref_t)kvm->mm->pgd,
.ia_bits = vabits_actual,
- .start_level = (KVM_PGTABLE_MAX_LEVELS -
- CONFIG_PGTABLE_LEVELS),
+ .start_level = (KVM_PGTABLE_LAST_LEVEL -
+ CONFIG_PGTABLE_LEVELS + 1),
.mm_ops = &kvm_user_mm_ops,
};
unsigned long flags;
kvm_pte_t pte = 0; /* Keep GCC quiet... */
- u32 level = ~0;
+ s8 level = S8_MAX;
int ret;
/*
@@ -829,7 +829,9 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
* Not seeing an error, but not updating level? Something went
* deeply wrong...
*/
- if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
+ if (WARN_ON(level > KVM_PGTABLE_LAST_LEVEL))
+ return -EFAULT;
+ if (WARN_ON(level < KVM_PGTABLE_FIRST_LEVEL))
return -EFAULT;
/* Oops, the userspace PTs are gone... Replay the fault */
@@ -1374,7 +1376,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
- unsigned long fault_status)
+ bool fault_is_perm)
{
int ret = 0;
bool write_fault, writable, force_pte = false;
@@ -1388,17 +1390,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
- unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
- fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
+ if (fault_is_perm)
+ fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
write_fault = kvm_is_write_fault(vcpu);
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
- if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
+ if (fault_is_perm && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
@@ -1409,8 +1411,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
- if (fault_status != ESR_ELx_FSC_PERM ||
- (logging_active && write_fault)) {
+ if (!fault_is_perm || (logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
if (ret)
@@ -1527,8 +1528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* backed by a THP and thus use block mapping if possible.
*/
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
- if (fault_status == ESR_ELx_FSC_PERM &&
- fault_granule > PAGE_SIZE)
+ if (fault_is_perm && fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
@@ -1541,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
}
- if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
+ if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (mte_allowed) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1567,7 +1567,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size.
*/
- if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
+ if (fault_is_perm && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
@@ -1618,7 +1618,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
*/
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
{
- unsigned long fault_status;
+ unsigned long esr;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
unsigned long hva;
@@ -1626,12 +1626,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
gfn_t gfn;
int ret, idx;
- fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+ esr = kvm_vcpu_get_esr(vcpu);
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (fault_status == ESR_ELx_FSC_FAULT) {
+ if (esr_fsc_is_permission_fault(esr)) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
@@ -1666,9 +1666,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- if (fault_status != ESR_ELx_FSC_FAULT &&
- fault_status != ESR_ELx_FSC_PERM &&
- fault_status != ESR_ELx_FSC_ACCESS) {
+ if (!esr_fsc_is_translation_fault(esr) &&
+ !esr_fsc_is_permission_fault(esr) &&
+ !esr_fsc_is_access_flag_fault(esr)) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1730,13 +1730,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
- if (fault_status == ESR_ELx_FSC_ACCESS) {
+ if (esr_fsc_is_access_flag_fault(esr)) {
handle_access_fault(vcpu, fault_ipa);
ret = 1;
goto out_unlock;
}
- ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, hva,
+ esr_fsc_is_permission_fault(esr));
if (ret == 0)
ret = 1;
out:
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 042695a210ce..ba95d044bc98 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -23,13 +23,9 @@
* This list should get updated as new features get added to the NV
* support, and new extension to the architecture.
*/
-void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static u64 limit_nv_id_reg(u32 id, u64 val)
{
- u32 id = reg_to_encoding(r);
- u64 val, tmp;
-
- val = p->regval;
+ u64 tmp;
switch (id) {
case SYS_ID_AA64ISAR0_EL1:
@@ -158,5 +154,17 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
break;
}
- p->regval = val;
+ return val;
+}
+int kvm_init_nv_sysregs(struct kvm *kvm)
+{
+ mutex_lock(&kvm->arch.config_lock);
+
+ for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++)
+ kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i),
+ kvm->arch.id_regs[i]);
+
+ mutex_unlock(&kvm->arch.config_lock);
+
+ return 0;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 5bb4de162cab..68d1d05672bd 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -280,12 +280,11 @@ int __init kvm_set_ipa_limit(void)
parange = cpuid_feature_extract_unsigned_field(mmfr0,
ID_AA64MMFR0_EL1_PARANGE_SHIFT);
/*
- * IPA size beyond 48 bits could not be supported
- * on either 4K or 16K page size. Hence let's cap
- * it to 48 bits, in case it's reported as larger
- * on the system.
+ * IPA size beyond 48 bits for 4K and 16K page size is only supported
+ * when LPA2 is available. So if we have LPA2, enable it, else cap to 48
+ * bits, in case it's reported as larger on the system.
*/
- if (PAGE_SIZE != SZ_64K)
+ if (!kvm_lpa2_is_enabled() && PAGE_SIZE != SZ_64K)
parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48);
/*
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index ff45d688bd7d..30253bd19917 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -45,44 +45,170 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 val);
-static bool read_from_write_only(struct kvm_vcpu *vcpu,
- struct sys_reg_params *params,
- const struct sys_reg_desc *r)
+static bool bad_trap(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r,
+ const char *msg)
{
- WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n");
+ WARN_ONCE(1, "Unexpected %s\n", msg);
print_sys_reg_instr(params);
kvm_inject_undefined(vcpu);
return false;
}
+static bool read_from_write_only(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params,
+ const struct sys_reg_desc *r)
+{
+ return bad_trap(vcpu, params, r,
+ "sys_reg read to write-only register");
+}
+
static bool write_to_read_only(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r)
{
- WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n");
- print_sys_reg_instr(params);
- kvm_inject_undefined(vcpu);
- return false;
+ return bad_trap(vcpu, params, r,
+ "sys_reg write to read-only register");
+}
+
+#define PURE_EL2_SYSREG(el2) \
+ case el2: { \
+ *el1r = el2; \
+ return true; \
+ }
+
+#define MAPPED_EL2_SYSREG(el2, el1, fn) \
+ case el2: { \
+ *xlate = fn; \
+ *el1r = el1; \
+ return true; \
+ }
+
+static bool get_el2_to_el1_mapping(unsigned int reg,
+ unsigned int *el1r, u64 (**xlate)(u64))
+{
+ switch (reg) {
+ PURE_EL2_SYSREG( VPIDR_EL2 );
+ PURE_EL2_SYSREG( VMPIDR_EL2 );
+ PURE_EL2_SYSREG( ACTLR_EL2 );
+ PURE_EL2_SYSREG( HCR_EL2 );
+ PURE_EL2_SYSREG( MDCR_EL2 );
+ PURE_EL2_SYSREG( HSTR_EL2 );
+ PURE_EL2_SYSREG( HACR_EL2 );
+ PURE_EL2_SYSREG( VTTBR_EL2 );
+ PURE_EL2_SYSREG( VTCR_EL2 );
+ PURE_EL2_SYSREG( RVBAR_EL2 );
+ PURE_EL2_SYSREG( TPIDR_EL2 );
+ PURE_EL2_SYSREG( HPFAR_EL2 );
+ PURE_EL2_SYSREG( CNTHCTL_EL2 );
+ MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
+ translate_sctlr_el2_to_sctlr_el1 );
+ MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1,
+ translate_cptr_el2_to_cpacr_el1 );
+ MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1,
+ translate_ttbr0_el2_to_ttbr0_el1 );
+ MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1, NULL );
+ MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1,
+ translate_tcr_el2_to_tcr_el1 );
+ MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1, NULL );
+ MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1, NULL );
+ MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1, NULL );
+ MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL );
+ MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL );
+ MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
+ MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
+ default:
+ return false;
+ }
}
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val = 0x8badf00d8badf00d;
+ u64 (*xlate)(u64) = NULL;
+ unsigned int el1r;
+
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+ goto memory_read;
- if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
- __vcpu_read_sys_reg_from_cpu(reg, &val))
+ if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
+ if (!is_hyp_ctxt(vcpu))
+ goto memory_read;
+
+ /*
+ * If this register does not have an EL1 counterpart,
+ * then read the stored EL2 version.
+ */
+ if (reg == el1r)
+ goto memory_read;
+
+ /*
+ * If we have a non-VHE guest and that the sysreg
+ * requires translation to be used at EL1, use the
+ * in-memory copy instead.
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
+ goto memory_read;
+
+ /* Get the current version of the EL1 counterpart. */
+ WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val));
return val;
+ }
+ /* EL1 register can't be on the CPU if the guest is in vEL2. */
+ if (unlikely(is_hyp_ctxt(vcpu)))
+ goto memory_read;
+
+ if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+ return val;
+
+memory_read:
return __vcpu_sys_reg(vcpu, reg);
}
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
- if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
- __vcpu_write_sys_reg_to_cpu(val, reg))
+ u64 (*xlate)(u64) = NULL;
+ unsigned int el1r;
+
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+ goto memory_write;
+
+ if (unlikely(get_el2_to_el1_mapping(reg, &el1r, &xlate))) {
+ if (!is_hyp_ctxt(vcpu))
+ goto memory_write;
+
+ /*
+ * Always store a copy of the write to memory to avoid having
+ * to reverse-translate virtual EL2 system registers for a
+ * non-VHE guest hypervisor.
+ */
+ __vcpu_sys_reg(vcpu, reg) = val;
+
+ /* No EL1 counterpart? We're done here.? */
+ if (reg == el1r)
+ return;
+
+ if (!vcpu_el2_e2h_is_set(vcpu) && xlate)
+ val = xlate(val);
+
+ /* Redirect this to the EL1 version of the register. */
+ WARN_ON(!__vcpu_write_sys_reg_to_cpu(val, el1r));
+ return;
+ }
+
+ /* EL1 register can't be on the CPU if the guest is in vEL2. */
+ if (unlikely(is_hyp_ctxt(vcpu)))
+ goto memory_write;
+
+ if (__vcpu_write_sys_reg_to_cpu(val, reg))
return;
- __vcpu_sys_reg(vcpu, reg) = val;
+memory_write:
+ __vcpu_sys_reg(vcpu, reg) = val;
}
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
@@ -1505,8 +1631,6 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
return write_to_read_only(vcpu, p, r);
p->regval = read_id_reg(vcpu, r);
- if (vcpu_has_nv(vcpu))
- access_nested_id_reg(vcpu, p, r);
return true;
}
@@ -1885,6 +2009,32 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static bool bad_vncr_trap(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /*
+ * We really shouldn't be here, and this is likely the result
+ * of a misconfigured trap, as this register should target the
+ * VNCR page, and nothing else.
+ */
+ return bad_trap(vcpu, p, r,
+ "trap of VNCR-backed register");
+}
+
+static bool bad_redir_trap(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /*
+ * We really shouldn't be here, and this is likely the result
+ * of a misconfigured trap, as this register should target the
+ * corresponding EL1, and nothing else.
+ */
+ return bad_trap(vcpu, p, r,
+ "trap of EL2 register redirected to EL1");
+}
+
#define EL2_REG(name, acc, rst, v) { \
SYS_DESC(SYS_##name), \
.access = acc, \
@@ -1894,6 +2044,9 @@ static unsigned int el2_visibility(const struct kvm_vcpu *vcpu,
.val = v, \
}
+#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
+#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
+
/*
* EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
* HCR_EL2.E2H==1, and only in the sysreg table for convenience of
@@ -2508,32 +2661,33 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ PMU_SYS_REG(PMCCFILTR_EL0), .access = access_pmu_evtyper,
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
- EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
- EL2_REG(VMPIDR_EL2, access_rw, reset_unknown, 0),
+ EL2_REG_VNCR(VPIDR_EL2, reset_unknown, 0),
+ EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
- EL2_REG(HCR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HCR_EL2, reset_val, 0),
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
- EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
- EL2_REG(HACR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HFGRTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HFGWTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HFGITR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HACR_EL2, reset_val, 0),
- EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HCRX_EL2, reset_val, 0),
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
- EL2_REG(VTTBR_EL2, access_rw, reset_val, 0),
- EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
+ EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
{ SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
- EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
- EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
- EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
- EL2_REG(ELR_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
+ EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
+ EL2_REG_REDIR(SPSR_EL2, reset_val, 0),
+ EL2_REG_REDIR(ELR_EL2, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
/* AArch32 SPSR_* are RES0 if trapped from a NV guest */
@@ -2549,10 +2703,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
- EL2_REG(ESR_EL2, access_rw, reset_val, 0),
+ EL2_REG_REDIR(ESR_EL2, reset_val, 0),
{ SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
- EL2_REG(FAR_EL2, access_rw, reset_val, 0),
+ EL2_REG_REDIR(FAR_EL2, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
@@ -2565,24 +2719,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
- EL2_REG(CNTVOFF_EL2, access_rw, reset_val, 0),
+ EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
- EL12_REG(SCTLR, access_vm_reg, reset_val, 0x00C50078),
- EL12_REG(CPACR, access_rw, reset_val, 0),
- EL12_REG(TTBR0, access_vm_reg, reset_unknown, 0),
- EL12_REG(TTBR1, access_vm_reg, reset_unknown, 0),
- EL12_REG(TCR, access_vm_reg, reset_val, 0),
- { SYS_DESC(SYS_SPSR_EL12), access_spsr},
- { SYS_DESC(SYS_ELR_EL12), access_elr},
- EL12_REG(AFSR0, access_vm_reg, reset_unknown, 0),
- EL12_REG(AFSR1, access_vm_reg, reset_unknown, 0),
- EL12_REG(ESR, access_vm_reg, reset_unknown, 0),
- EL12_REG(FAR, access_vm_reg, reset_unknown, 0),
- EL12_REG(MAIR, access_vm_reg, reset_unknown, 0),
- EL12_REG(AMAIR, access_vm_reg, reset_amair_el1, 0),
- EL12_REG(VBAR, access_rw, reset_val, 0),
- EL12_REG(CONTEXTIDR, access_vm_reg, reset_val, 0),
EL12_REG(CNTKCTL, access_rw, reset_val, 0),
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 2dad2d095160..e2764d0ffa9f 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -590,7 +590,11 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
unsigned long flags;
raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+
irq = __vgic_its_check_cache(dist, db, devid, eventid);
+ if (irq)
+ vgic_get_irq_kref(irq);
+
raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
return irq;
@@ -769,6 +773,7 @@ int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi)
raw_spin_lock_irqsave(&irq->irq_lock, flags);
irq->pending_latch = true;
vgic_queue_irq_unlock(kvm, irq, flags);
+ vgic_put_irq(kvm, irq);
return 0;
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index a764b0ab8bf9..c15ee1df036a 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -357,31 +357,13 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
- unsigned long flags;
-
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- if (test_bit(i, &val)) {
- /*
- * pending_latch is set irrespective of irq type
- * (level or edge) to avoid dependency that VM should
- * restore irq config before pending info.
- */
- irq->pending_latch = true;
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
- } else {
- irq->pending_latch = false;
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
+ int ret;
- vgic_put_irq(vcpu->kvm, irq);
- }
+ ret = vgic_uaccess_write_spending(vcpu, addr, len, val);
+ if (ret)
+ return ret;
- return 0;
+ return vgic_uaccess_write_cpending(vcpu, addr, len, ~val);
}
/* We want to avoid outer shareable. */
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index ff558c05e990..cf76523a2194 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -301,9 +301,8 @@ static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2);
}
-void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
+static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
+ unsigned long val, bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
@@ -312,14 +311,22 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- /* GICD_ISPENDR0 SGI bits are WI */
- if (is_vgic_v2_sgi(vcpu, irq)) {
+ /* GICD_ISPENDR0 SGI bits are WI when written from the guest. */
+ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) {
vgic_put_irq(vcpu->kvm, irq);
continue;
}
raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ /*
+ * GICv2 SGIs are terribly broken. We can't restore
+ * the source of the interrupt, so just pick the vcpu
+ * itself as the source...
+ */
+ if (is_vgic_v2_sgi(vcpu, irq))
+ irq->source |= BIT(vcpu->vcpu_id);
+
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
/* HW SGI? Ask the GIC to inject it */
int err;
@@ -335,7 +342,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
}
irq->pending_latch = true;
- if (irq->hw)
+ if (irq->hw && !is_user)
vgic_irq_set_phys_active(irq, true);
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
@@ -343,33 +350,18 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
}
}
+void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __set_pending(vcpu, addr, len, val, false);
+}
+
int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
- unsigned long flags;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = true;
-
- /*
- * GICv2 SGIs are terribly broken. We can't restore
- * the source of the interrupt, so just pick the vcpu
- * itself as the source...
- */
- if (is_vgic_v2_sgi(vcpu, irq))
- irq->source |= BIT(vcpu->vcpu_id);
-
- vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
+ __set_pending(vcpu, addr, len, val, true);
return 0;
}
@@ -394,9 +386,9 @@ static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
vgic_irq_set_phys_active(irq, false);
}
-void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len,
- unsigned long val)
+static void __clear_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val, bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
int i;
@@ -405,14 +397,22 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- /* GICD_ICPENDR0 SGI bits are WI */
- if (is_vgic_v2_sgi(vcpu, irq)) {
+ /* GICD_ICPENDR0 SGI bits are WI when written from the guest. */
+ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) {
vgic_put_irq(vcpu->kvm, irq);
continue;
}
raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ /*
+ * More fun with GICv2 SGIs! If we're clearing one of them
+ * from userspace, which source vcpu to clear? Let's not
+ * even think of it, and blow the whole set.
+ */
+ if (is_vgic_v2_sgi(vcpu, irq))
+ irq->source = 0;
+
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
/* HW SGI? Ask the GIC to clear its pending bit */
int err;
@@ -427,7 +427,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
continue;
}
- if (irq->hw)
+ if (irq->hw && !is_user)
vgic_hw_irq_cpending(vcpu, irq);
else
irq->pending_latch = false;
@@ -437,33 +437,18 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
}
}
+void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ __clear_pending(vcpu, addr, len, val, false);
+}
+
int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- int i;
- unsigned long flags;
-
- for_each_set_bit(i, &val, len * 8) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
-
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- /*
- * More fun with GICv2 SGIs! If we're clearing one of them
- * from userspace, which source vcpu to clear? Let's not
- * even think of it, and blow the whole set.
- */
- if (is_vgic_v2_sgi(vcpu, irq))
- irq->source = 0;
-
- irq->pending_latch = false;
-
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
+ __clear_pending(vcpu, addr, len, val, true);
return 0;
}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3cb101e8cb29..61886e43e3a1 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -47,7 +47,7 @@ void arch_teardown_dma_ops(struct device *dev)
#endif
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
int cls = cache_line_size_of_cpu();
@@ -58,7 +58,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
ARCH_DMA_MINALIGN, cls);
dev->dma_coherent = coherent;
- if (iommu)
+ if (device_iommu_mapped(dev))
iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
xen_setup_dma_ops(dev);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 1e07d74d7a6c..b912b1409fc0 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -84,7 +84,6 @@ WORKAROUND_2077057
WORKAROUND_2457168
WORKAROUND_2645198
WORKAROUND_2658417
-WORKAROUND_2966298
WORKAROUND_AMPERE_AC03_CPU_38
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
@@ -100,3 +99,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP
WORKAROUND_QCOM_FALKOR_E1003
WORKAROUND_REPEAT_TLBI
WORKAROUND_SPECULATIVE_AT
+WORKAROUND_SPECULATIVE_UNPRIV_LOAD
diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h
index 908d8b0bc4fd..d011a81575d2 100644
--- a/arch/csky/abiv1/inc/abi/cacheflush.h
+++ b/arch/csky/abiv1/inc/abi/cacheflush.h
@@ -43,6 +43,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
*/
extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
#define flush_cache_vmap(start, end) cache_wbinv_all()
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) cache_wbinv_all()
#define flush_icache_range(start, end) cache_wbinv_range(start, end)
diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h
index 40be16907267..6513ac5d2578 100644
--- a/arch/csky/abiv2/inc/abi/cacheflush.h
+++ b/arch/csky/abiv2/inc/abi/cacheflush.h
@@ -41,6 +41,7 @@ void flush_icache_mm_range(struct mm_struct *mm,
void flush_icache_deferred(struct mm_struct *mm);
#define flush_cache_vmap(start, end) do { } while (0)
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0)
#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/csky/configs/defconfig b/arch/csky/configs/defconfig
index af722e4dfb47..ff559e5162aa 100644
--- a/arch/csky/configs/defconfig
+++ b/arch/csky/configs/defconfig
@@ -34,7 +34,8 @@ CONFIG_GENERIC_PHY=y
CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_QUOTA=y
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_FSCACHE_STATS=y
CONFIG_CACHEFILES=m
CONFIG_MSDOS_FS=y
diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild
index beb8499dd8ed..bfa21465d83a 100644
--- a/arch/loongarch/Kbuild
+++ b/arch/loongarch/Kbuild
@@ -4,6 +4,7 @@ obj-y += net/
obj-y += vdso/
obj-$(CONFIG_KVM) += kvm/
+obj-$(CONFIG_BUILTIN_DTB) += boot/dts/
# for cleaning
subdir- += boot
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index ee123820a476..10959e6c3583 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -5,6 +5,7 @@ config LOONGARCH
select ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_MCFG if ACPI
+ select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU
select ACPI_PPTT if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_BINFMT_ELF_STATE
@@ -71,6 +72,7 @@ config LOONGARCH
select GENERIC_CLOCKEVENTS
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_DEVICES
select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY
select GENERIC_IOREMAP if !ARCH_IOREMAP
@@ -140,6 +142,7 @@ config LOONGARCH
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RETHOOK
select HAVE_RSEQ
+ select HAVE_RUST
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_SETUP_PER_CPU_AREA if NUMA
@@ -374,6 +377,24 @@ config CMDLINE_FORCE
endchoice
+config BUILTIN_DTB
+ bool "Enable built-in dtb in kernel"
+ depends on OF
+ help
+ Some existing systems do not provide a canonical device tree to
+ the kernel at boot time. Let's provide a device tree table in the
+ kernel, keyed by the dts filename, containing the relevant DTBs.
+
+ Built-in DTBs are generic enough and can be used as references.
+
+config BUILTIN_DTB_NAME
+ string "Source file for built-in dtb"
+ depends on BUILTIN_DTB
+ help
+ Base name (without suffix, relative to arch/loongarch/boot/dts/)
+ for the DTS file that will be used to produce the DTB linked into
+ the kernel.
+
config DMI
bool "Enable DMI scanning"
select DMI_SCAN_MACHINE_NON_EFI_FALLBACK
@@ -575,6 +596,9 @@ config ARCH_SELECTS_CRASH_DUMP
depends on CRASH_DUMP
select RELOCATABLE
+config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+ def_bool CRASH_CORE
+
config RELOCATABLE
bool "Relocatable kernel"
help
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 4ba8d67ddb09..983aa2b1629a 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -6,6 +6,7 @@
boot := arch/loongarch/boot
KBUILD_DEFCONFIG := loongson3_defconfig
+KBUILD_DTBS := dtbs
image-name-y := vmlinux
image-name-$(CONFIG_EFI_ZBOOT) := vmlinuz
@@ -81,8 +82,11 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs
KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
endif
+KBUILD_RUSTFLAGS_MODULE += -Crelocation-model=pic
+
ifeq ($(CONFIG_RELOCATABLE),y)
KBUILD_CFLAGS_KERNEL += -fPIE
+KBUILD_RUSTFLAGS_KERNEL += -Crelocation-model=pie
LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs)
endif
@@ -141,7 +145,7 @@ endif
vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
-all: $(notdir $(KBUILD_IMAGE))
+all: $(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
vmlinuz.efi: vmlinux.efi
diff --git a/arch/loongarch/boot/dts/Makefile b/arch/loongarch/boot/dts/Makefile
index 5f1f55e911ad..747d0c3f6389 100644
--- a/arch/loongarch/boot/dts/Makefile
+++ b/arch/loongarch/boot/dts/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-dtstree := $(srctree)/$(src)
-dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
+dtb-y = loongson-2k0500-ref.dtb loongson-2k1000-ref.dtb loongson-2k2000-ref.dtb
+
+obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_NAME))
diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
new file mode 100644
index 000000000000..b38071a4d0b0
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include "loongson-2k0500.dtsi"
+
+/ {
+ compatible = "loongson,ls2k0500-ref", "loongson,ls2k0500";
+ model = "Loongson-2K0500 Reference Board";
+
+ aliases {
+ ethernet0 = &gmac0;
+ ethernet1 = &gmac1;
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@200000 {
+ device_type = "memory";
+ reg = <0x0 0x00200000 0x0 0x0ee00000>,
+ <0x0 0x90000000 0x0 0x60000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+ size = <0x0 0x2000000>;
+ linux,cma-default;
+ };
+ };
+};
+
+&gmac0 {
+ status = "okay";
+
+ phy-mode = "rgmii";
+ bus_id = <0x0>;
+};
+
+&gmac1 {
+ status = "okay";
+
+ phy-mode = "rgmii";
+ bus_id = <0x1>;
+};
+
+&i2c0 {
+ status = "okay";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ eeprom@57{
+ compatible = "atmel,24c16";
+ reg = <0x57>;
+ pagesize = <16>;
+ };
+};
+
+&ehci0 {
+ status = "okay";
+};
+
+&ohci0 {
+ status = "okay";
+};
+
+&sata {
+ status = "okay";
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&rtc0 {
+ status = "okay";
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi
new file mode 100644
index 000000000000..444779c21034
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ compatible = "loongson,la264";
+ device_type = "cpu";
+ reg = <0x0>;
+ clocks = <&cpu_clk>;
+ };
+ };
+
+ cpu_clk: cpu-clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <500000000>;
+ };
+
+ cpuintc: interrupt-controller {
+ compatible = "loongson,cpu-interrupt-controller";
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ };
+
+ bus@10000000 {
+ compatible = "simple-bus";
+ ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>,
+ <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>,
+ <0x0 0x20000000 0x0 0x20000000 0x0 0x10000000>,
+ <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>,
+ <0xfe 0x0 0xfe 0x0 0x0 0x40000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ isa@16400000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x16400000 0x4000>;
+ };
+
+ liointc0: interrupt-controller@1fe11400 {
+ compatible = "loongson,liointc-2.0";
+ reg = <0x0 0x1fe11400 0x0 0x40>,
+ <0x0 0x1fe11040 0x0 0x8>;
+ reg-names = "main", "isr0";
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+ interrupt-names = "int0";
+
+ loongson,parent_int_map = <0xffffffff>, /* int0 */
+ <0x00000000>, /* int1 */
+ <0x00000000>, /* int2 */
+ <0x00000000>; /* int3 */
+ };
+
+ liointc1: interrupt-controller@1fe11440 {
+ compatible = "loongson,liointc-2.0";
+ reg = <0x0 0x1fe11440 0x0 0x40>,
+ <0x0 0x1fe11048 0x0 0x8>;
+ reg-names = "main", "isr0";
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <4>;
+ interrupt-names = "int2";
+
+ loongson,parent_int_map = <0x00000000>, /* int0 */
+ <0x00000000>, /* int1 */
+ <0xffffffff>, /* int2 */
+ <0x00000000>; /* int3 */
+ };
+
+ eiointc: interrupt-controller@1fe11600 {
+ compatible = "loongson,ls2k0500-eiointc";
+ reg = <0x0 0x1fe11600 0x0 0xea00>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <3>;
+ };
+
+ gmac0: ethernet@1f020000 {
+ compatible = "snps,dwmac-3.70a";
+ reg = <0x0 0x1f020000 0x0 0x10000>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ status = "disabled";
+ };
+
+ gmac1: ethernet@1f030000 {
+ compatible = "snps,dwmac-3.70a";
+ reg = <0x0 0x1f030000 0x0 0x10000>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ status = "disabled";
+ };
+
+ sata: sata@1f040000 {
+ compatible = "snps,spear-ahci";
+ reg = <0x0 0x1f040000 0x0 0x10000>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <75>;
+ status = "disabled";
+ };
+
+ ehci0: usb@1f050000 {
+ compatible = "generic-ehci";
+ reg = <0x0 0x1f050000 0x0 0x8000>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <71>;
+ status = "disabled";
+ };
+
+ ohci0: usb@1f058000 {
+ compatible = "generic-ohci";
+ reg = <0x0 0x1f058000 0x0 0x8000>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <72>;
+ status = "disabled";
+ };
+
+ uart0: serial@1ff40800 {
+ compatible = "ns16550a";
+ reg = <0x0 0x1ff40800 0x0 0x10>;
+ clock-frequency = <100000000>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <2>;
+ no-loopback-test;
+ status = "disabled";
+ };
+
+ i2c0: i2c@1ff48000 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1ff48000 0x0 0x0800>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <14>;
+ status = "disabled";
+ };
+
+ i2c@1ff48800 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1ff48800 0x0 0x0800>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <15>;
+ status = "disabled";
+ };
+
+ i2c@1ff49000 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1ff49000 0x0 0x0800>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <16>;
+ status = "disabled";
+ };
+
+ i2c@1ff49800 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1ff49800 0x0 0x0800>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <17>;
+ status = "disabled";
+ };
+
+ i2c@1ff4a000 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1ff4a000 0x0 0x0800>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <18>;
+ status = "disabled";
+ };
+
+ i2c@1ff4a800 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1ff4a800 0x0 0x0800>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <19>;
+ status = "disabled";
+ };
+
+ pmc: power-management@1ff6c000 {
+ compatible = "loongson,ls2k0500-pmc", "syscon";
+ reg = <0x0 0x1ff6c000 0x0 0x58>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <56>;
+ loongson,suspend-address = <0x0 0x1c000500>;
+
+ syscon-reboot {
+ compatible = "syscon-reboot";
+ offset = <0x30>;
+ mask = <0x1>;
+ };
+
+ syscon-poweroff {
+ compatible = "syscon-poweroff";
+ regmap = <&pmc>;
+ offset = <0x14>;
+ mask = <0x3c00>;
+ value = <0x3c00>;
+ };
+ };
+
+ rtc0: rtc@1ff6c100 {
+ compatible = "loongson,ls2k0500-rtc", "loongson,ls7a-rtc";
+ reg = <0x0 0x1ff6c100 0x0 0x100>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <35>;
+ status = "disabled";
+ };
+
+ pcie@1a000000 {
+ compatible = "loongson,ls2k-pci";
+ reg = <0x0 0x1a000000 0x0 0x02000000>,
+ <0xfe 0x0 0x0 0x20000000>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ bus-range = <0x0 0x5>;
+ ranges = <0x01000000 0x0 0x00004000 0x0 0x16404000 0x0 0x00004000>,
+ <0x02000000 0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>;
+
+ pcie@0,0 {
+ reg = <0x0000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&eiointc>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &eiointc 81>;
+ ranges;
+ };
+
+ pcie@1,0 {
+ reg = <0x0800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&eiointc>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &eiointc 82>;
+ ranges;
+ };
+ };
+ };
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
new file mode 100644
index 000000000000..132a2d1ea8bc
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include "loongson-2k1000.dtsi"
+
+/ {
+ compatible = "loongson,ls2k1000-ref", "loongson,ls2k1000";
+ model = "Loongson-2K1000 Reference Board";
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@200000 {
+ device_type = "memory";
+ reg = <0x0 0x00200000 0x0 0x06e00000>,
+ <0x0 0x08000000 0x0 0x07000000>,
+ <0x0 0x90000000 0x1 0xe0000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+ size = <0x0 0x2000000>;
+ linux,cma-default;
+ };
+ };
+};
+
+&gmac0 {
+ status = "okay";
+
+ phy-mode = "rgmii";
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
+};
+
+&gmac1 {
+ status = "okay";
+
+ phy-mode = "rgmii";
+ phy-handle = <&phy1>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <16>;
+ };
+ };
+};
+
+&i2c2 {
+ status = "okay";
+
+ pinctrl-0 = <&i2c0_pins_default>;
+ pinctrl-names = "default";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ eeprom@57{
+ compatible = "atmel,24c16";
+ reg = <0x57>;
+ pagesize = <16>;
+ };
+};
+
+&spi0 {
+ status = "okay";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ spidev@0 {
+ compatible = "rohm,dh2228fv";
+ spi-max-frequency = <100000000>;
+ reg = <0>;
+ };
+};
+
+&ehci0 {
+ status = "okay";
+};
+
+&ohci0 {
+ status = "okay";
+};
+
+&sata {
+ status = "okay";
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&clk {
+ status = "okay";
+};
+
+&rtc0 {
+ status = "okay";
+};
+
+&pctrl {
+ status = "okay";
+
+ sdio_pins_default: sdio-pins {
+ sdio-pinmux {
+ groups = "sdio";
+ function = "sdio";
+ };
+ sdio-det-pinmux {
+ groups = "pwm2";
+ function = "gpio";
+ };
+ };
+
+ pwm1_pins_default: pwm1-pins {
+ pinmux {
+ groups = "pwm1";
+ function = "pwm1";
+ };
+ };
+
+ pwm0_pins_default: pwm0-pins {
+ pinmux {
+ groups = "pwm0";
+ function = "pwm0";
+ };
+ };
+
+ i2c1_pins_default: i2c1-pins {
+ pinmux {
+ groups = "i2c1";
+ function = "i2c1";
+ };
+ };
+
+ i2c0_pins_default: i2c0-pins {
+ pinmux {
+ groups = "i2c0";
+ function = "i2c0";
+ };
+ };
+
+ nand_pins_default: nand-pins {
+ pinmux {
+ groups = "nand";
+ function = "nand";
+ };
+ };
+
+ hda_pins_default: hda-pins {
+ grp0-pinmux {
+ groups = "hda";
+ function = "hda";
+ };
+ grp1-pinmux {
+ groups = "i2s";
+ function = "gpio";
+ };
+ };
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
new file mode 100644
index 000000000000..49a70f8c3cab
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/loongson,ls2k-clk.h>
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ compatible = "loongson,la264";
+ device_type = "cpu";
+ reg= <0x0>;
+ clocks = <&clk LOONGSON2_NODE_CLK>;
+ };
+
+ cpu1: cpu@1 {
+ compatible = "loongson,la264";
+ device_type = "cpu";
+ reg = <0x1>;
+ clocks = <&clk LOONGSON2_NODE_CLK>;
+ };
+ };
+
+ ref_100m: clock-ref-100m {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <100000000>;
+ clock-output-names = "ref_100m";
+ };
+
+ cpuintc: interrupt-controller {
+ compatible = "loongson,cpu-interrupt-controller";
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ };
+
+ /* i2c of the dvi eeprom edid */
+ i2c-gpio-0 {
+ compatible = "i2c-gpio";
+ scl-gpios = <&gpio0 0 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&gpio0 1 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ i2c-gpio,delay-us = <5>; /* ~100 kHz */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ /* i2c of the eeprom edid */
+ i2c-gpio-1 {
+ compatible = "i2c-gpio";
+ scl-gpios = <&gpio0 33 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ sda-gpios = <&gpio0 32 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+ i2c-gpio,delay-us = <5>; /* ~100 kHz */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ thermal-zones {
+ cpu-thermal {
+ polling-delay-passive = <1000>;
+ polling-delay = <5000>;
+ thermal-sensors = <&tsensor 0>;
+
+ trips {
+ cpu_alert: cpu-alert {
+ temperature = <33000>;
+ hysteresis = <2000>;
+ type = "active";
+ };
+
+ cpu_crit: cpu-crit {
+ temperature = <85000>;
+ hysteresis = <5000>;
+ type = "critical";
+ };
+ };
+ };
+ };
+
+ bus@10000000 {
+ compatible = "simple-bus";
+ ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>,
+ <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>,
+ <0x0 0x20000000 0x0 0x20000000 0x0 0x10000000>,
+ <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>,
+ <0xfe 0x0 0xfe 0x0 0x0 0x40000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ dma-coherent;
+
+ liointc0: interrupt-controller@1fe01400 {
+ compatible = "loongson,liointc-2.0";
+ reg = <0x0 0x1fe01400 0x0 0x40>,
+ <0x0 0x1fe01040 0x0 0x8>,
+ <0x0 0x1fe01140 0x0 0x8>;
+ reg-names = "main", "isr0", "isr1";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+ interrupt-names = "int0";
+ loongson,parent_int_map = <0xffffffff>, /* int0 */
+ <0x00000000>, /* int1 */
+ <0x00000000>, /* int2 */
+ <0x00000000>; /* int3 */
+ };
+
+ liointc1: interrupt-controller@1fe01440 {
+ compatible = "loongson,liointc-2.0";
+ reg = <0x0 0x1fe01440 0x0 0x40>,
+ <0x0 0x1fe01048 0x0 0x8>,
+ <0x0 0x1fe01148 0x0 0x8>;
+ reg-names = "main", "isr0", "isr1";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <3>;
+ interrupt-names = "int1";
+ loongson,parent_int_map = <0x00000000>, /* int0 */
+ <0xffffffff>, /* int1 */
+ <0x00000000>, /* int2 */
+ <0x00000000>; /* int3 */
+ };
+
+ chipid@1fe00000 {
+ compatible = "loongson,ls2k-chipid";
+ reg = <0x0 0x1fe00000 0x0 0x30>;
+ little-endian;
+ };
+
+ pctrl: pinctrl@1fe00420 {
+ compatible = "loongson,ls2k-pinctrl";
+ reg = <0x0 0x1fe00420 0x0 0x18>;
+ status = "disabled";
+ };
+
+ clk: clock-controller@1fe00480 {
+ compatible = "loongson,ls2k-clk";
+ reg = <0x0 0x1fe00480 0x0 0x58>;
+ #clock-cells = <1>;
+ clocks = <&ref_100m>;
+ clock-names = "ref_100m";
+ status = "disabled";
+ };
+
+ gpio0: gpio@1fe00500 {
+ compatible = "loongson,ls2k-gpio";
+ reg = <0x0 0x1fe00500 0x0 0x38>;
+ ngpios = <64>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pctrl 0x0 0x0 15>,
+ <&pctrl 16 16 15>,
+ <&pctrl 32 32 10>,
+ <&pctrl 44 44 20>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <28 IRQ_TYPE_LEVEL_HIGH>,
+ <29 IRQ_TYPE_LEVEL_HIGH>,
+ <30 IRQ_TYPE_LEVEL_HIGH>,
+ <30 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <26 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <>,
+ <>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>,
+ <27 IRQ_TYPE_LEVEL_HIGH>;
+ };
+
+ tsensor: thermal-sensor@1fe01500 {
+ compatible = "loongson,ls2k1000-thermal";
+ reg = <0x0 0x1fe01500 0x0 0x30>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <7 IRQ_TYPE_LEVEL_HIGH>;
+ #thermal-sensor-cells = <1>;
+ };
+
+ dma-controller@1fe00c00 {
+ compatible = "loongson,ls2k1000-apbdma";
+ reg = <0x0 0x1fe00c00 0x0 0x8>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #dma-cells = <1>;
+ status = "disabled";
+ };
+
+ dma-controller@1fe00c10 {
+ compatible = "loongson,ls2k1000-apbdma";
+ reg = <0x0 0x1fe00c10 0x0 0x8>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #dma-cells = <1>;
+ status = "disabled";
+ };
+
+ dma-controller@1fe00c20 {
+ compatible = "loongson,ls2k1000-apbdma";
+ reg = <0x0 0x1fe00c20 0x0 0x8>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #dma-cells = <1>;
+ status = "disabled";
+ };
+
+ dma-controller@1fe00c30 {
+ compatible = "loongson,ls2k1000-apbdma";
+ reg = <0x0 0x1fe00c30 0x0 0x8>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <15 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #dma-cells = <1>;
+ status = "disabled";
+ };
+
+ dma-controller@1fe00c40 {
+ compatible = "loongson,ls2k1000-apbdma";
+ reg = <0x0 0x1fe00c40 0x0 0x8>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk LOONGSON2_APB_CLK>;
+ #dma-cells = <1>;
+ status = "disabled";
+ };
+
+ uart0: serial@1fe20000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x1fe20000 0x0 0x10>;
+ clock-frequency = <125000000>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <0x0 IRQ_TYPE_LEVEL_HIGH>;
+ no-loopback-test;
+ status = "disabled";
+ };
+
+ i2c2: i2c@1fe21000 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1fe21000 0x0 0x8>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ i2c3: i2c@1fe21800 {
+ compatible = "loongson,ls2k-i2c";
+ reg = <0x0 0x1fe21800 0x0 0x8>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <23 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ pmc: power-management@1fe27000 {
+ compatible = "loongson,ls2k1000-pmc", "loongson,ls2k0500-pmc", "syscon";
+ reg = <0x0 0x1fe27000 0x0 0x58>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+ loongson,suspend-address = <0x0 0x1c000500>;
+
+ syscon-reboot {
+ compatible = "syscon-reboot";
+ offset = <0x30>;
+ mask = <0x1>;
+ };
+
+ syscon-poweroff {
+ compatible = "syscon-poweroff";
+ regmap = <&pmc>;
+ offset = <0x14>;
+ mask = <0x3c00>;
+ value = <0x3c00>;
+ };
+ };
+
+ rtc0: rtc@1fe27800 {
+ compatible = "loongson,ls2k1000-rtc";
+ reg = <0x0 0x1fe27800 0x0 0x100>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ spi0: spi@1fff0220 {
+ compatible = "loongson,ls2k1000-spi";
+ reg = <0x0 0x1fff0220 0x0 0x10>;
+ clocks = <&clk LOONGSON2_BOOT_CLK>;
+ status = "disabled";
+ };
+
+ pcie@1a000000 {
+ compatible = "loongson,ls2k-pci";
+ reg = <0x0 0x1a000000 0x0 0x02000000>,
+ <0xfe 0x0 0x0 0x20000000>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ bus-range = <0x0 0xff>;
+ ranges = <0x01000000 0x0 0x00008000 0x0 0x18008000 0x0 0x00008000>,
+ <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
+
+ gmac0: ethernet@3,0 {
+ reg = <0x1800 0x0 0x0 0x0 0x0>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+ <13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
+ status = "disabled";
+ };
+
+ gmac1: ethernet@3,1 {
+ reg = <0x1900 0x0 0x0 0x0 0x0>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+ <15 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
+ status = "disabled";
+ };
+
+ ehci0: usb@4,1 {
+ reg = <0x2100 0x0 0x0 0x0 0x0>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <18 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ ohci0: usb@4,2 {
+ reg = <0x2200 0x0 0x0 0x0 0x0>;
+ interrupt-parent = <&liointc1>;
+ interrupts = <19 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ display@6,0 {
+ reg = <0x3000 0x0 0x0 0x0 0x0>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <28 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ hda@7,0 {
+ reg = <0x3800 0x0 0x0 0x0 0x0>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ sata: sata@8,0 {
+ reg = <0x4000 0x0 0x0 0x0 0x0>;
+ interrupt-parent = <&liointc0>;
+ interrupts = <19 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ pcie@9,0 {
+ reg = <0x4800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 0x0 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@a,0 {
+ reg = <0x5000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&liointc1>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 1 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@b,0 {
+ reg = <0x5800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&liointc1>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 2 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@c,0 {
+ reg = <0x6000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&liointc1>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 3 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@d,0 {
+ reg = <0x6800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&liointc1>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 4 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@e,0 {
+ reg = <0x7000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&liointc1>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 5 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+ };
+ };
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
new file mode 100644
index 000000000000..dca91caf895e
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include "loongson-2k2000.dtsi"
+
+/ {
+ compatible = "loongson,ls2k2000-ref", "loongson,ls2k2000";
+ model = "Loongson-2K2000 Reference Board";
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@200000 {
+ device_type = "memory";
+ reg = <0x0 0x00200000 0x0 0x0ee00000>,
+ <0x0 0x90000000 0x0 0x70000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+ size = <0x0 0x2000000>;
+ linux,cma-default;
+ };
+ };
+};
+
+&sata {
+ status = "okay";
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&rtc0 {
+ status = "okay";
+};
+
+&xhci0 {
+ status = "okay";
+};
+
+&xhci1 {
+ status = "okay";
+};
+
+&gmac0 {
+ status = "okay";
+};
+
+&gmac1 {
+ status = "okay";
+};
+
+&gmac2 {
+ status = "okay";
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
new file mode 100644
index 000000000000..a231949b5f55
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@1 {
+ compatible = "loongson,la364";
+ device_type = "cpu";
+ reg = <0x0>;
+ clocks = <&cpu_clk>;
+ };
+
+ cpu1: cpu@2 {
+ compatible = "loongson,la364";
+ device_type = "cpu";
+ reg = <0x1>;
+ clocks = <&cpu_clk>;
+ };
+ };
+
+ cpu_clk: cpu-clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1400000000>;
+ };
+
+ cpuintc: interrupt-controller {
+ compatible = "loongson,cpu-interrupt-controller";
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ };
+
+ bus@10000000 {
+ compatible = "simple-bus";
+ ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>,
+ <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>,
+ <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>,
+ <0xfe 0x0 0xfe 0x0 0x0 0x40000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pmc: power-management@100d0000 {
+ compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
+ reg = <0x0 0x100d0000 0x0 0x58>;
+ interrupt-parent = <&eiointc>;
+ interrupts = <47>;
+ loongson,suspend-address = <0x0 0x1c000500>;
+
+ syscon-reboot {
+ compatible = "syscon-reboot";
+ offset = <0x30>;
+ mask = <0x1>;
+ };
+
+ syscon-poweroff {
+ compatible = "syscon-poweroff";
+ regmap = <&pmc>;
+ offset = <0x14>;
+ mask = <0x3c00>;
+ value = <0x3c00>;
+ };
+ };
+
+ liointc: interrupt-controller@1fe01400 {
+ compatible = "loongson,liointc-1.0";
+ reg = <0x0 0x1fe01400 0x0 0x64>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+ interrupt-names = "int0";
+ loongson,parent_int_map = <0xffffffff>, /* int0 */
+ <0x00000000>, /* int1 */
+ <0x00000000>, /* int2 */
+ <0x00000000>; /* int3 */
+ };
+
+ eiointc: interrupt-controller@1fe01600 {
+ compatible = "loongson,ls2k2000-eiointc";
+ reg = <0x0 0x1fe01600 0x0 0xea00>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpuintc>;
+ interrupts = <3>;
+ };
+
+ pic: interrupt-controller@10000000 {
+ compatible = "loongson,pch-pic-1.0";
+ reg = <0x0 0x10000000 0x0 0x400>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ loongson,pic-base-vec = <0>;
+ interrupt-parent = <&eiointc>;
+ };
+
+ msi: msi-controller@1fe01140 {
+ compatible = "loongson,pch-msi-1.0";
+ reg = <0x0 0x1fe01140 0x0 0x8>;
+ msi-controller;
+ loongson,msi-base-vec = <64>;
+ loongson,msi-num-vecs = <192>;
+ interrupt-parent = <&eiointc>;
+ };
+
+ rtc0: rtc@100d0100 {
+ compatible = "loongson,ls2k2000-rtc", "loongson,ls7a-rtc";
+ reg = <0x0 0x100d0100 0x0 0x100>;
+ interrupt-parent = <&pic>;
+ interrupts = <52 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ uart0: serial@1fe001e0 {
+ compatible = "ns16550a";
+ reg = <0x0 0x1fe001e0 0x0 0x10>;
+ clock-frequency = <100000000>;
+ interrupt-parent = <&liointc>;
+ interrupts = <10 IRQ_TYPE_LEVEL_HIGH>;
+ no-loopback-test;
+ status = "disabled";
+ };
+
+ pcie@1a000000 {
+ compatible = "loongson,ls2k-pci";
+ reg = <0x0 0x1a000000 0x0 0x02000000>,
+ <0xfe 0x0 0x0 0x20000000>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ bus-range = <0x0 0xff>;
+ ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+ <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
+
+ gmac0: ethernet@3,0 {
+ reg = <0x1800 0x0 0x0 0x0 0x0>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ gmac1: ethernet@3,1 {
+ reg = <0x1900 0x0 0x0 0x0 0x0>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ gmac2: ethernet@3,2 {
+ reg = <0x1a00 0x0 0x0 0x0 0x0>;
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ xhci0: usb@4,0 {
+ reg = <0x2000 0x0 0x0 0x0 0x0>;
+ interrupts = <48 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ xhci1: usb@19,0 {
+ reg = <0xc800 0x0 0x0 0x0 0x0>;
+ interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ display@6,1 {
+ reg = <0x3100 0x0 0x0 0x0 0x0>;
+ interrupts = <28 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ hda@7,0 {
+ reg = <0x3800 0x0 0x0 0x0 0x0>;
+ interrupts = <58 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ sata: sata@8,0 {
+ reg = <0x4000 0x0 0x0 0x0 0x0>;
+ interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&pic>;
+ status = "disabled";
+ };
+
+ pcie@9,0 {
+ reg = <0x4800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 32 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@a,0 {
+ reg = <0x5000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 33 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@b,0 {
+ reg = <0x5800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 34 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@c,0 {
+ reg = <0x6000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 35 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@d,0 {
+ reg = <0x6800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 36 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@e,0 {
+ reg = <0x7000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 37 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@f,0 {
+ reg = <0x7800 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 40 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+
+ pcie@10,0 {
+ reg = <0x8000 0x0 0x0 0x0 0x0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ interrupt-parent = <&pic>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+ interrupt-map = <0x0 0x0 0x0 0x0 &pic 30 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+ };
+ };
+ };
+};
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 60e331af9839..f18c2ba871ef 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -6,6 +6,8 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_BPF_SYSCALL=y
CONFIG_BPF_JIT=y
CONFIG_PREEMPT=y
+CONFIG_PREEMPT_DYNAMIC=y
+CONFIG_SCHED_CORE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
@@ -19,6 +21,7 @@ CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CPUSETS=y
@@ -26,6 +29,7 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
@@ -35,6 +39,8 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
CONFIG_PERF_EVENTS=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
CONFIG_LOONGARCH=y
CONFIG_64BIT=y
CONFIG_MACH_LOONGSON64=y
@@ -44,13 +50,11 @@ CONFIG_DMI=y
CONFIG_EFI=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
-CONFIG_NR_CPUS=64
+CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_CPU_HAS_FPU=y
CONFIG_CPU_HAS_LSX=y
CONFIG_CPU_HAS_LASX=y
-CONFIG_KEXEC=y
-CONFIG_CRASH_DUMP=y
CONFIG_RANDOMIZE_BASE=y
CONFIG_SUSPEND=y
CONFIG_HIBERNATION=y
@@ -62,10 +66,6 @@ CONFIG_ACPI_IPMI=m
CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_PCI_SLOT=y
CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_EFI_ZBOOT=y
-CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
-CONFIG_EFI_CAPSULE_LOADER=m
-CONFIG_EFI_TEST=m
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=m
CONFIG_JUMP_LABEL=y
@@ -74,10 +74,18 @@ CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
+CONFIG_BLK_DEV_ZONED=y
CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_DEV_THROTTLING_LOW=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_FC_APPID=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_CGROUP_IOPRIO=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_BSD_DISKLABEL=y
CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CMDLINE_PARTITION=y
CONFIG_IOSCHED_BFQ=y
CONFIG_BFQ_GROUP_IOSCHED=y
CONFIG_BINFMT_MISC=m
@@ -93,6 +101,8 @@ CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA=y
+CONFIG_CMA_SYSFS=y
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -128,6 +138,7 @@ CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
CONFIG_INET6_ESP=m
CONFIG_IPV6_MROUTE=y
+CONFIG_MPTCP=y
CONFIG_NETWORK_PHY_TIMESTAMPING=y
CONFIG_NETFILTER=y
CONFIG_BRIDGE_NETFILTER=m
@@ -352,6 +363,7 @@ CONFIG_PCIEAER=y
CONFIG_PCI_IOV=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_SHPC=y
+CONFIG_PCI_HOST_GENERIC=y
CONFIG_PCCARD=m
CONFIG_YENTA=m
CONFIG_RAPIDIO=y
@@ -365,6 +377,10 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_FW_LOADER_COMPRESS=y
CONFIG_FW_LOADER_COMPRESS_ZSTD=y
+CONFIG_EFI_ZBOOT=y
+CONFIG_EFI_BOOTLOADER_CONTROL=m
+CONFIG_EFI_CAPSULE_LOADER=m
+CONFIG_EFI_TEST=m
CONFIG_MTD=m
CONFIG_MTD_BLOCK=m
CONFIG_MTD_CFI=m
@@ -586,6 +602,7 @@ CONFIG_RTW89_8852AE=m
CONFIG_RTW89_8852CE=m
CONFIG_ZD1211RW=m
CONFIG_USB_NET_RNDIS_WLAN=m
+CONFIG_USB4_NET=m
CONFIG_INPUT_MOUSEDEV=y
CONFIG_INPUT_MOUSEDEV_PSAUX=y
CONFIG_INPUT_EVDEV=y
@@ -691,6 +708,9 @@ CONFIG_SND_HDA_CODEC_SIGMATEL=y
CONFIG_SND_HDA_CODEC_HDMI=y
CONFIG_SND_HDA_CODEC_CONEXANT=y
CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_SOC=m
+CONFIG_SND_SOC_LOONGSON_CARD=m
+CONFIG_SND_VIRTIO=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
CONFIG_HID_A4TECH=m
@@ -738,6 +758,11 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_EFI=y
CONFIG_RTC_DRV_LOONGSON=y
CONFIG_DMADEVICES=y
+CONFIG_LS2X_APB_DMA=y
+CONFIG_UDMABUF=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DMABUF_HEAPS_CMA=y
CONFIG_UIO=m
CONFIG_UIO_PDRV_GENIRQ=m
CONFIG_UIO_DMEM_GENIRQ=m
@@ -778,7 +803,15 @@ CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
CONFIG_DEVFREQ_GOV_PERFORMANCE=y
CONFIG_DEVFREQ_GOV_POWERSAVE=y
CONFIG_DEVFREQ_GOV_USERSPACE=y
+CONFIG_NTB=m
+CONFIG_NTB_MSI=y
+CONFIG_NTB_IDT=m
+CONFIG_NTB_EPF=m
+CONFIG_NTB_SWITCHTEC=m
+CONFIG_NTB_PERF=m
+CONFIG_NTB_TRANSPORT=m
CONFIG_PWM=y
+CONFIG_USB4=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
@@ -797,6 +830,10 @@ CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_F2FS_FS=m
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_CHECK_FS=y
+CONFIG_F2FS_FS_COMPRESSION=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
@@ -883,7 +920,6 @@ CONFIG_KEY_DH_OPERATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_YAMA=y
CONFIG_DEFAULT_SECURITY_DAC=y
@@ -914,6 +950,9 @@ CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_CRC32_LOONGARCH=m
CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_DMA_CMA=y
+CONFIG_DMA_NUMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h
index c60796869b2b..6d5846dd075c 100644
--- a/arch/loongarch/include/asm/bootinfo.h
+++ b/arch/loongarch/include/asm/bootinfo.h
@@ -24,13 +24,15 @@ struct loongson_board_info {
const char *board_vendor;
};
+#define NR_WORDS DIV_ROUND_UP(NR_CPUS, BITS_PER_LONG)
+
struct loongson_system_configuration {
int nr_cpus;
int nr_nodes;
int boot_cpu_id;
int cores_per_node;
int cores_per_package;
- unsigned long cores_io_master;
+ unsigned long cores_io_master[NR_WORDS];
unsigned long suspend_addr;
const char *cpuname;
};
@@ -42,7 +44,7 @@ extern struct loongson_system_configuration loongson_sysconf;
static inline bool io_master(int cpu)
{
- return test_bit(cpu, &loongson_sysconf.cores_io_master);
+ return test_bit(cpu, loongson_sysconf.cores_io_master);
}
#endif /* _ASM_BOOTINFO_H */
diff --git a/arch/loongarch/include/asm/crash_core.h b/arch/loongarch/include/asm/crash_core.h
new file mode 100644
index 000000000000..218bdbfa527b
--- /dev/null
+++ b/arch/loongarch/include/asm/crash_core.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LOONGARCH_CRASH_CORE_H
+#define _LOONGARCH_CRASH_CORE_H
+
+#define CRASH_ALIGN SZ_2M
+
+#define CRASH_ADDR_LOW_MAX SZ_4G
+#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM()
+
+extern phys_addr_t memblock_end_of_DRAM(void);
+
+#endif
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index 9b16a3b8e706..f16bd42456e4 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -241,8 +241,6 @@ void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs);
do { \
current->thread.vdso = &vdso_info; \
\
- loongarch_set_personality_fcsr(state); \
- \
if (personality(current->personality) != PER_LINUX) \
set_personality(PER_LINUX); \
} while (0)
@@ -259,7 +257,6 @@ do { \
clear_thread_flag(TIF_32BIT_ADDR); \
\
current->thread.vdso = &vdso_info; \
- loongarch_set_personality_fcsr(state); \
\
p = personality(current->personality); \
if (p != PER_LINUX32 && p != PER_LINUX) \
@@ -340,6 +337,4 @@ extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
struct arch_elf_state *state);
-extern void loongarch_set_personality_fcsr(struct arch_elf_state *state);
-
#endif /* _ASM_ELF_H */
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index a11996eb5892..de891c2c83d4 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -63,7 +63,7 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
static __always_inline void
ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip)
{
- regs_set_return_value(&fregs->regs, ip);
+ instruction_pointer_set(&fregs->regs, ip);
}
#define ftrace_regs_get_argument(fregs, n) \
diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index 11328700d4fa..2d62f7b0d377 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -45,7 +45,10 @@ struct kvm_vcpu_stat {
u64 signal_exits;
};
+#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0)
+#define KVM_MEM_HUGEPAGE_INCAPABLE (1UL << 1)
struct kvm_arch_memory_slot {
+ unsigned long flags;
};
struct kvm_context {
@@ -92,8 +95,10 @@ enum emulation_result {
};
#define KVM_LARCH_FPU (0x1 << 0)
-#define KVM_LARCH_SWCSR_LATEST (0x1 << 1)
-#define KVM_LARCH_HWCSR_USABLE (0x1 << 2)
+#define KVM_LARCH_LSX (0x1 << 1)
+#define KVM_LARCH_LASX (0x1 << 2)
+#define KVM_LARCH_SWCSR_LATEST (0x1 << 3)
+#define KVM_LARCH_HWCSR_USABLE (0x1 << 4)
struct kvm_vcpu_arch {
/*
@@ -175,6 +180,21 @@ static inline void writel_sw_gcsr(struct loongarch_csrs *csr, int reg, unsigned
csr->csrs[reg] = val;
}
+static inline bool kvm_guest_has_fpu(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[2] & CPUCFG2_FP;
+}
+
+static inline bool kvm_guest_has_lsx(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[2] & CPUCFG2_LSX;
+}
+
+static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch)
+{
+ return arch->cpucfg[2] & CPUCFG2_LASX;
+}
+
/* Debug: dump vcpu state */
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
@@ -183,7 +203,6 @@ void kvm_flush_tlb_all(void);
void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa);
int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write);
-#define KVM_ARCH_WANT_MMU_NOTIFIER
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index 553cfa2b2b1c..0cb4fdb8a9b5 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -55,7 +55,26 @@ void kvm_save_fpu(struct loongarch_fpu *fpu);
void kvm_restore_fpu(struct loongarch_fpu *fpu);
void kvm_restore_fcsr(struct loongarch_fpu *fpu);
-void kvm_acquire_timer(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_CPU_HAS_LSX
+int kvm_own_lsx(struct kvm_vcpu *vcpu);
+void kvm_save_lsx(struct loongarch_fpu *fpu);
+void kvm_restore_lsx(struct loongarch_fpu *fpu);
+#else
+static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { return -EINVAL; }
+static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { }
+static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { }
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+int kvm_own_lasx(struct kvm_vcpu *vcpu);
+void kvm_save_lasx(struct loongarch_fpu *fpu);
+void kvm_restore_lasx(struct loongarch_fpu *fpu);
+#else
+static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { return -EINVAL; }
+static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { }
+static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { }
+#endif
+
void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz);
void kvm_reset_timer(struct kvm_vcpu *vcpu);
void kvm_save_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/loongarch/include/asm/shmparam.h b/arch/loongarch/include/asm/shmparam.h
deleted file mode 100644
index c9554f48d2df..000000000000
--- a/arch/loongarch/include/asm/shmparam.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_SHMPARAM_H
-#define _ASM_SHMPARAM_H
-
-#define __ARCH_FORCE_SHMLBA 1
-
-#define SHMLBA SZ_64K /* attach addr a multiple of this */
-
-#endif /* _ASM_SHMPARAM_H */
diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
index c6ad2ee6106c..923d0bd38294 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -79,6 +79,7 @@ struct kvm_fpu {
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+#define KVM_LOONGARCH_VCPU_CPUCFG 0
struct kvm_debug_exit_arch {
};
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 8e00a754e548..b6b097bbf866 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -119,7 +119,7 @@ acpi_parse_eio_master(union acpi_subtable_headers *header, const unsigned long e
return -EINVAL;
core = eiointc->node * CORES_PER_EIO_NODE;
- set_bit(core, &(loongson_sysconf.cores_io_master));
+ set_bit(core, loongson_sysconf.cores_io_master);
return 0;
}
diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
index acb5d3385675..000825406c1f 100644
--- a/arch/loongarch/kernel/efi.c
+++ b/arch/loongarch/kernel/efi.c
@@ -140,4 +140,6 @@ void __init efi_init(void)
early_memunmap(tbl, sizeof(*tbl));
}
+
+ efi_esrt_init();
}
diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c
index 183e94fc9c69..0fa81ced28dc 100644
--- a/arch/loongarch/kernel/elf.c
+++ b/arch/loongarch/kernel/elf.c
@@ -23,8 +23,3 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
{
return 0;
}
-
-void loongarch_set_personality_fcsr(struct arch_elf_state *state)
-{
- current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0;
-}
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c
index 6b3bfb0092e6..2f1f5b08638f 100644
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -5,13 +5,16 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <linux/acpi.h>
+#include <linux/clk.h>
#include <linux/efi.h>
#include <linux/export.h>
#include <linux/memblock.h>
+#include <linux/of_clk.h>
#include <asm/early_ioremap.h>
#include <asm/bootinfo.h>
#include <asm/loongson.h>
#include <asm/setup.h>
+#include <asm/time.h>
u64 efi_system_table;
struct loongson_system_configuration loongson_sysconf;
@@ -36,7 +39,16 @@ void __init init_environ(void)
static int __init init_cpu_fullname(void)
{
- int cpu;
+ struct device_node *root;
+ int cpu, ret;
+ char *model;
+
+ /* Parsing cpuname from DTS model property */
+ root = of_find_node_by_path("/");
+ ret = of_property_read_string(root, "model", (const char **)&model);
+ of_node_put(root);
+ if (ret == 0)
+ loongson_sysconf.cpuname = strsep(&model, " ");
if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) {
for (cpu = 0; cpu < NR_CPUS; cpu++)
@@ -46,6 +58,26 @@ static int __init init_cpu_fullname(void)
}
arch_initcall(init_cpu_fullname);
+static int __init fdt_cpu_clk_init(void)
+{
+ struct clk *clk;
+ struct device_node *np;
+
+ np = of_get_cpu_node(0, NULL);
+ if (!np)
+ return -ENODEV;
+
+ clk = of_clk_get(np, 0);
+ if (IS_ERR(clk))
+ return -ENODEV;
+
+ cpu_clock_freq = clk_get_rate(clk);
+ clk_put(clk);
+
+ return 0;
+}
+late_initcall(fdt_cpu_clk_init);
+
static ssize_t boardinfo_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index d53ab10f4644..4382e36ae3d4 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -349,6 +349,7 @@ SYM_FUNC_START(_restore_lsx_upper)
lsx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lsx_upper)
+EXPORT_SYMBOL(_restore_lsx_upper)
SYM_FUNC_START(_init_lsx_upper)
lsx_init_all_upper t1
@@ -384,6 +385,7 @@ SYM_FUNC_START(_restore_lasx_upper)
lasx_restore_all_upper a0 t0 t1
jr ra
SYM_FUNC_END(_restore_lasx_upper)
+EXPORT_SYMBOL(_restore_lasx_upper)
SYM_FUNC_START(_init_lasx_upper)
lasx_init_all_upper t1
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 0ecab4216392..c4f7de2e2805 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -74,6 +74,11 @@ SYM_CODE_START(kernel_entry) # kernel entry point
la.pcrel t0, fw_arg2
st.d a2, t0, 0
+#ifdef CONFIG_PAGE_SIZE_4KB
+ li.d t0, 0
+ li.d t1, CSR_STFILL
+ csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1
+#endif
/* KSave3 used for percpu base, initialized as 0 */
csrwr zero, PERCPU_BASE_KS
/* GPR21 used for percpu base (runtime), initialized as 0 */
@@ -126,6 +131,11 @@ SYM_CODE_START(smpboot_entry)
JUMP_VIRT_ADDR t0, t1
+#ifdef CONFIG_PAGE_SIZE_4KB
+ li.d t0, 0
+ li.d t1, CSR_STFILL
+ csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1
+#endif
/* Enable PG */
li.w t0, 0xb0 # PLV=0, IE=0, PG=1
csrwr t0, LOONGARCH_CSR_CRMD
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 767d94cce0de..f2ff8b5d591e 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -85,6 +85,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
regs->csr_euen = euen;
lose_fpu(0);
lose_lbt(0);
+ current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0;
clear_thread_flag(TIF_LSX_CTX_LIVE);
clear_thread_flag(TIF_LASX_CTX_LIVE);
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index d183a745fb85..edf2bba80130 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -252,38 +252,23 @@ static void __init arch_reserve_vmcore(void)
#endif
}
-/* 2MB alignment for crash kernel regions */
-#define CRASH_ALIGN SZ_2M
-#define CRASH_ADDR_MAX SZ_4G
-
-static void __init arch_parse_crashkernel(void)
+static void __init arch_reserve_crashkernel(void)
{
-#ifdef CONFIG_KEXEC
int ret;
- unsigned long long total_mem;
+ unsigned long long low_size = 0;
unsigned long long crash_base, crash_size;
+ char *cmdline = boot_command_line;
+ bool high = false;
- total_mem = memblock_phys_mem_size();
- ret = parse_crashkernel(boot_command_line, total_mem,
- &crash_size, &crash_base,
- NULL, NULL);
- if (ret < 0 || crash_size <= 0)
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;
- if (crash_base <= 0) {
- crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_MAX);
- if (!crash_base) {
- pr_warn("crashkernel reservation failed - No suitable area found.\n");
- return;
- }
- } else if (!memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size)) {
- pr_warn("Invalid memory region reserved for crash kernel\n");
+ ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
+ &crash_size, &crash_base, &low_size, &high);
+ if (ret)
return;
- }
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
-#endif
+ reserve_crashkernel_generic(cmdline, crash_size, crash_base, low_size, high);
}
static void __init fdt_setup(void)
@@ -295,8 +280,12 @@ static void __init fdt_setup(void)
if (acpi_os_get_root_pointer())
return;
- /* Look for a device tree configuration table entry */
- fdt_pointer = efi_fdt_pointer();
+ /* Prefer to use built-in dtb, checking its legality first. */
+ if (!fdt_check_header(__dtb_start))
+ fdt_pointer = __dtb_start;
+ else
+ fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */
+
if (!fdt_pointer || fdt_check_header(fdt_pointer))
return;
@@ -330,7 +319,9 @@ static void __init bootcmdline_init(char **cmdline_p)
if (boot_command_line[0])
strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
- strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE);
+ if (!strstr(boot_command_line, init_command_line))
+ strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE);
+
goto out;
}
#endif
@@ -357,7 +348,7 @@ out:
void __init platform_init(void)
{
arch_reserve_vmcore();
- arch_parse_crashkernel();
+ arch_reserve_crashkernel();
#ifdef CONFIG_ACPI_TABLE_UPGRADE
acpi_table_upgrade();
@@ -467,15 +458,6 @@ static void __init resource_init(void)
request_resource(res, &data_resource);
request_resource(res, &bss_resource);
}
-
-#ifdef CONFIG_KEXEC
- if (crashk_res.start < crashk_res.end) {
- insert_resource(&iomem_resource, &crashk_res);
- pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
- (unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20),
- (unsigned long)(crashk_res.start >> 20));
- }
-#endif
}
static int __init add_legacy_isa_io(struct fwnode_handle *fwnode,
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 5bca12d16e06..2b49d30eb7c0 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -208,7 +208,7 @@ static void __init fdt_smp_setup(void)
}
loongson_sysconf.nr_cpus = num_processors;
- set_bit(0, &(loongson_sysconf.cores_io_master));
+ set_bit(0, loongson_sysconf.cores_io_master);
#endif
}
@@ -216,6 +216,9 @@ void __init loongson_smp_setup(void)
{
fdt_smp_setup();
+ if (loongson_sysconf.cores_per_package == 0)
+ loongson_sysconf.cores_per_package = num_processors;
+
cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
@@ -506,7 +509,6 @@ asmlinkage void start_secondary(void)
sync_counter();
cpu = raw_smp_processor_id();
set_my_cpu_offset(per_cpu_offset(cpu));
- rcutree_report_cpu_starting(cpu);
cpu_probe();
constant_clockevent_init();
diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c
index 3fd166006698..75d5c51a7cd3 100644
--- a/arch/loongarch/kernel/topology.c
+++ b/arch/loongarch/kernel/topology.c
@@ -10,47 +10,9 @@
#include <acpi/processor.h>
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
-
#ifdef CONFIG_HOTPLUG_CPU
-int arch_register_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
{
- int ret;
- struct cpu *c = &per_cpu(cpu_devices, cpu);
-
- c->hotpluggable = 1;
- ret = register_cpu(c, cpu);
- if (ret < 0)
- pr_warn("register_cpu %d failed (%d)\n", cpu, ret);
-
- return ret;
-}
-EXPORT_SYMBOL(arch_register_cpu);
-
-void arch_unregister_cpu(int cpu)
-{
- struct cpu *c = &per_cpu(cpu_devices, cpu);
-
- c->hotpluggable = 0;
- unregister_cpu(c);
+ return !io_master(cpu);
}
-EXPORT_SYMBOL(arch_unregister_cpu);
#endif
-
-static int __init topology_init(void)
-{
- int i, ret;
-
- for_each_present_cpu(i) {
- struct cpu *c = &per_cpu(cpu_devices, i);
-
- c->hotpluggable = !io_master(i);
- ret = register_cpu(c, i);
- if (ret < 0)
- pr_warn("topology_init: register_cpu %d failed (%d)\n", i, ret);
- }
-
- return 0;
-}
-
-subsys_initcall(topology_init);
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index fda425babfb2..61f7e33b1f95 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -22,14 +22,13 @@ config KVM
depends on AS_HAS_LVZ_EXTENSION
depends on HAVE_KVM
select HAVE_KVM_DIRTY_RING_ACQ_REL
- select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
+ select KVM_GENERIC_MMU_NOTIFIER
select KVM_MMIO
select KVM_XFER_TO_GUEST_WORK
- select MMU_NOTIFIER
- select PREEMPT_NOTIFIERS
help
Support hosting virtualized guest machines using
hardware virtualization extensions. You will need
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index ce8de3fa472c..ed1d89d53e2e 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -200,17 +200,8 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu)
++vcpu->stat.idle_exits;
trace_kvm_exit_idle(vcpu, KVM_TRACE_EXIT_IDLE);
- if (!kvm_arch_vcpu_runnable(vcpu)) {
- /*
- * Switch to the software timer before halt-polling/blocking as
- * the guest's timer may be a break event for the vCPU, and the
- * hypervisor timer runs only when the CPU is in guest mode.
- * Switch before halt-polling so that KVM recognizes an expired
- * timer before blocking.
- */
- kvm_save_timer(vcpu);
- kvm_vcpu_block(vcpu);
- }
+ if (!kvm_arch_vcpu_runnable(vcpu))
+ kvm_vcpu_halt(vcpu);
return EMULATE_DONE;
}
@@ -643,6 +634,11 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
+ if (!kvm_guest_has_fpu(&vcpu->arch)) {
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+ return RESUME_GUEST;
+ }
+
/*
* If guest FPU not present, the FPU operation should have been
* treated as a reserved instruction!
@@ -660,6 +656,36 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
}
/*
+ * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use LSX when it is disabled in the root
+ * context.
+ */
+static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu)
+{
+ if (kvm_own_lsx(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+/*
+ * kvm_handle_lasx_disabled() - Guest used LASX while disabled in root.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use LASX when it is disabled in the root
+ * context.
+ */
+static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu)
+{
+ if (kvm_own_lasx(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+/*
* LoongArch KVM callback handling for unimplemented guest exiting
*/
static int kvm_fault_ni(struct kvm_vcpu *vcpu)
@@ -687,6 +713,8 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
[EXCCODE_TLBS] = kvm_handle_write_fault,
[EXCCODE_TLBM] = kvm_handle_write_fault,
[EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
+ [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
+ [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
[EXCCODE_GSPR] = kvm_handle_gspr,
};
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
index 1c1d5199500e..86a2f2d0cb27 100644
--- a/arch/loongarch/kvm/main.c
+++ b/arch/loongarch/kvm/main.c
@@ -287,7 +287,6 @@ int kvm_arch_hardware_enable(void)
if (env & CSR_GCFG_MATC_ROOT)
gcfg |= CSR_GCFG_MATC_ROOT;
- gcfg |= CSR_GCFG_TIT;
write_csr_gcfg(gcfg);
kvm_flush_tlb_all();
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
index 80480df5f550..50a6acd7ffe4 100644
--- a/arch/loongarch/kvm/mmu.c
+++ b/arch/loongarch/kvm/mmu.c
@@ -13,6 +13,16 @@
#include <asm/tlb.h>
#include <asm/kvm_mmu.h>
+static inline bool kvm_hugepage_capable(struct kvm_memory_slot *slot)
+{
+ return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE;
+}
+
+static inline bool kvm_hugepage_incapable(struct kvm_memory_slot *slot)
+{
+ return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE;
+}
+
static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx)
{
ctx->level = kvm->arch.root_level;
@@ -365,6 +375,69 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx);
}
+int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new, enum kvm_mr_change change)
+{
+ gpa_t gpa_start;
+ hva_t hva_start;
+ size_t size, gpa_offset, hva_offset;
+
+ if ((change != KVM_MR_MOVE) && (change != KVM_MR_CREATE))
+ return 0;
+ /*
+ * Prevent userspace from creating a memory region outside of the
+ * VM GPA address space
+ */
+ if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT))
+ return -ENOMEM;
+
+ new->arch.flags = 0;
+ size = new->npages * PAGE_SIZE;
+ gpa_start = new->base_gfn << PAGE_SHIFT;
+ hva_start = new->userspace_addr;
+ if (IS_ALIGNED(size, PMD_SIZE) && IS_ALIGNED(gpa_start, PMD_SIZE)
+ && IS_ALIGNED(hva_start, PMD_SIZE))
+ new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE;
+ else {
+ /*
+ * Pages belonging to memslots that don't have the same
+ * alignment within a PMD for userspace and GPA cannot be
+ * mapped with PMD entries, because we'll end up mapping
+ * the wrong pages.
+ *
+ * Consider a layout like the following:
+ *
+ * memslot->userspace_addr:
+ * +-----+--------------------+--------------------+---+
+ * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
+ * +-----+--------------------+--------------------+---+
+ *
+ * memslot->base_gfn << PAGE_SIZE:
+ * +---+--------------------+--------------------+-----+
+ * |abc|def Stage-2 block | Stage-2 block |tvxyz|
+ * +---+--------------------+--------------------+-----+
+ *
+ * If we create those stage-2 blocks, we'll end up with this
+ * incorrect mapping:
+ * d -> f
+ * e -> g
+ * f -> h
+ */
+ gpa_offset = gpa_start & (PMD_SIZE - 1);
+ hva_offset = hva_start & (PMD_SIZE - 1);
+ if (gpa_offset != hva_offset) {
+ new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
+ } else {
+ if (gpa_offset == 0)
+ gpa_offset = PMD_SIZE;
+ if ((size + gpa_offset) < (PMD_SIZE * 2))
+ new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
+ }
+ }
+
+ return 0;
+}
+
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
@@ -562,47 +635,23 @@ out:
}
static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
- unsigned long hva, unsigned long map_size, bool write)
+ unsigned long hva, bool write)
{
- size_t size;
- gpa_t gpa_start;
- hva_t uaddr_start, uaddr_end;
+ hva_t start, end;
/* Disable dirty logging on HugePages */
if (kvm_slot_dirty_track_enabled(memslot) && write)
return false;
- size = memslot->npages * PAGE_SIZE;
- gpa_start = memslot->base_gfn << PAGE_SHIFT;
- uaddr_start = memslot->userspace_addr;
- uaddr_end = uaddr_start + size;
+ if (kvm_hugepage_capable(memslot))
+ return true;
- /*
- * Pages belonging to memslots that don't have the same alignment
- * within a PMD for userspace and GPA cannot be mapped with stage-2
- * PMD entries, because we'll end up mapping the wrong pages.
- *
- * Consider a layout like the following:
- *
- * memslot->userspace_addr:
- * +-----+--------------------+--------------------+---+
- * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
- * +-----+--------------------+--------------------+---+
- *
- * memslot->base_gfn << PAGE_SIZE:
- * +---+--------------------+--------------------+-----+
- * |abc|def Stage-2 block | Stage-2 block |tvxyz|
- * +---+--------------------+--------------------+-----+
- *
- * If we create those stage-2 blocks, we'll end up with this incorrect
- * mapping:
- * d -> f
- * e -> g
- * f -> h
- */
- if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
+ if (kvm_hugepage_incapable(memslot))
return false;
+ start = memslot->userspace_addr;
+ end = start + memslot->npages * PAGE_SIZE;
+
/*
* Next, let's make sure we're not trying to map anything not covered
* by the memslot. This means we have to prohibit block size mappings
@@ -615,8 +664,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
* userspace_addr or the base_gfn, as both are equally aligned (per
* the check above) and equally sized.
*/
- return (hva & ~(map_size - 1)) >= uaddr_start &&
- (hva & ~(map_size - 1)) + map_size <= uaddr_end;
+ return (hva >= ALIGN(start, PMD_SIZE)) && (hva < ALIGN_DOWN(end, PMD_SIZE));
}
/*
@@ -627,7 +675,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
*
* There are several ways to safely use this helper:
*
- * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
+ * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
* consuming it. In this case, mmu_lock doesn't need to be held during the
* lookup, but it does need to be held while checking the MMU notifier.
*
@@ -807,7 +855,7 @@ retry:
/* Check if an invalidation has taken place since we got pfn */
spin_lock(&kvm->mmu_lock);
- if (mmu_invalidate_retry_hva(kvm, mmu_seq, hva)) {
+ if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
/*
* This can happen when mappings are changed asynchronously, but
* also synchronously if a COW is triggered by
@@ -842,7 +890,7 @@ retry:
/* Disable dirty logging on HugePages */
level = 0;
- if (!fault_supports_huge_mapping(memslot, hva, PMD_SIZE, write)) {
+ if (!fault_supports_huge_mapping(memslot, hva, write)) {
level = 0;
} else {
level = host_pfn_mapping_level(kvm, gfn, memslot);
@@ -901,12 +949,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
-int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
- struct kvm_memory_slot *new, enum kvm_mr_change change)
-{
- return 0;
-}
-
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 0ed9040307b7..ba976509bfe8 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -245,6 +245,37 @@ SYM_FUNC_START(kvm_restore_fpu)
jr ra
SYM_FUNC_END(kvm_restore_fpu)
+#ifdef CONFIG_CPU_HAS_LSX
+SYM_FUNC_START(kvm_save_lsx)
+ fpu_save_csr a0 t1
+ fpu_save_cc a0 t1 t2
+ lsx_save_data a0 t1
+ jr ra
+SYM_FUNC_END(kvm_save_lsx)
+
+SYM_FUNC_START(kvm_restore_lsx)
+ lsx_restore_data a0 t1
+ fpu_restore_cc a0 t1 t2
+ fpu_restore_csr a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_lsx)
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+SYM_FUNC_START(kvm_save_lasx)
+ fpu_save_csr a0 t1
+ fpu_save_cc a0 t1 t2
+ lasx_save_data a0 t1
+ jr ra
+SYM_FUNC_END(kvm_save_lasx)
+
+SYM_FUNC_START(kvm_restore_lasx)
+ lasx_restore_data a0 t1
+ fpu_restore_cc a0 t1 t2
+ fpu_restore_csr a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_lasx)
+#endif
.section ".rodata"
SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c
index 284bf553fefe..111328f60872 100644
--- a/arch/loongarch/kvm/timer.c
+++ b/arch/loongarch/kvm/timer.c
@@ -65,40 +65,23 @@ void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz)
}
/*
- * Restore hard timer state and enable guest to access timer registers
- * without trap, should be called with irq disabled
- */
-void kvm_acquire_timer(struct kvm_vcpu *vcpu)
-{
- unsigned long cfg;
-
- cfg = read_csr_gcfg();
- if (!(cfg & CSR_GCFG_TIT))
- return;
-
- /* Enable guest access to hard timer */
- write_csr_gcfg(cfg & ~CSR_GCFG_TIT);
-
- /*
- * Freeze the soft-timer and sync the guest stable timer with it. We do
- * this with interrupts disabled to avoid latency.
- */
- hrtimer_cancel(&vcpu->arch.swtimer);
-}
-
-/*
* Restore soft timer state from saved context.
*/
void kvm_restore_timer(struct kvm_vcpu *vcpu)
{
- unsigned long cfg, delta, period;
+ unsigned long cfg, estat;
+ unsigned long ticks, delta, period;
ktime_t expire, now;
struct loongarch_csrs *csr = vcpu->arch.csr;
/*
* Set guest stable timer cfg csr
+ * Disable timer before restore estat CSR register, avoid to
+ * get invalid timer interrupt for old timer cfg
*/
cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
+
+ write_gcsr_timercfg(0);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
if (!(cfg & CSR_TCFG_EN)) {
@@ -108,23 +91,55 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
}
/*
+ * Freeze the soft-timer and sync the guest stable timer with it.
+ */
+ hrtimer_cancel(&vcpu->arch.swtimer);
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
+ * If oneshot timer is fired, CSR TVAL will be -1, there are two
+ * conditions:
+ * 1) timer is fired during exiting to host
+ * 2) timer is fired and vm is doing timer irq, and then exiting to
+ * host. Host should not inject timer irq to avoid spurious
+ * timer interrupt again
+ */
+ ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) {
+ /*
+ * Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq
+ * and set CSR TVAL with -1
+ */
+ write_gcsr_timertick(0);
+
+ /*
+ * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
+ * timer interrupt, and CSR TVAL keeps unchanged with -1, it
+ * avoids spurious timer interrupt
+ */
+ if (!(estat & CPU_TIMER))
+ gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
* Set remainder tick value if not expired
*/
+ delta = 0;
now = ktime_get();
expire = vcpu->arch.expire;
if (ktime_before(now, expire))
delta = ktime_to_tick(vcpu, ktime_sub(expire, now));
- else {
- if (cfg & CSR_TCFG_PERIOD) {
- period = cfg & CSR_TCFG_VAL;
- delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
- delta = period - (delta % period);
- } else
- delta = 0;
+ else if (cfg & CSR_TCFG_PERIOD) {
+ period = cfg & CSR_TCFG_VAL;
+ delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
+ delta = period - (delta % period);
+
/*
* Inject timer here though sw timer should inject timer
* interrupt async already, since sw timer may be cancelled
- * during injecting intr async in function kvm_acquire_timer
+ * during injecting intr async
*/
kvm_queue_irq(vcpu, INT_TI);
}
@@ -139,27 +154,41 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
*/
static void _kvm_save_timer(struct kvm_vcpu *vcpu)
{
- unsigned long ticks, delta;
+ unsigned long ticks, delta, cfg;
ktime_t expire;
struct loongarch_csrs *csr = vcpu->arch.csr;
+ cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
- delta = tick_to_ns(vcpu, ticks);
- expire = ktime_add_ns(ktime_get(), delta);
- vcpu->arch.expire = expire;
- if (ticks) {
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
+ * If period timer is fired, CSR TVAL will be reloaded from CSR TCFG
+ * If oneshot timer is fired, CSR TVAL will be -1
+ * Here judge one-shot timer fired by checking whether TVAL is larger
+ * than TCFG
+ */
+ if (ticks < cfg) {
+ delta = tick_to_ns(vcpu, ticks);
+ expire = ktime_add_ns(ktime_get(), delta);
+ vcpu->arch.expire = expire;
+
/*
- * Update hrtimer to use new timeout
* HRTIMER_MODE_PINNED is suggested since vcpu may run in
* the same physical cpu in next time
*/
- hrtimer_cancel(&vcpu->arch.swtimer);
hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
- } else
+ } else if (vcpu->stat.generic.blocking) {
/*
- * Inject timer interrupt so that hall polling can dectect and exit
+ * Inject timer interrupt so that halt polling can dectect and exit.
+ * VCPU is scheduled out already and sleeps in rcuwait queue and
+ * will not poll pending events again. kvm_queue_irq() is not enough,
+ * hrtimer swtimer should be used here.
*/
- kvm_queue_irq(vcpu, INT_TI);
+ expire = ktime_add_ns(ktime_get(), 10);
+ vcpu->arch.expire = expire;
+ hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
+ }
}
/*
@@ -168,21 +197,15 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu)
*/
void kvm_save_timer(struct kvm_vcpu *vcpu)
{
- unsigned long cfg;
struct loongarch_csrs *csr = vcpu->arch.csr;
preempt_disable();
- cfg = read_csr_gcfg();
- if (!(cfg & CSR_GCFG_TIT)) {
- /* Disable guest use of hard timer */
- write_csr_gcfg(cfg | CSR_GCFG_TIT);
-
- /* Save hard timer state */
- kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
- kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
- if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
- _kvm_save_timer(vcpu);
- }
+
+ /* Save hard timer state */
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
+ _kvm_save_timer(vcpu);
/* Save timer-related state to vCPU context */
kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h
index a1e35d655418..c2484ad4cffa 100644
--- a/arch/loongarch/kvm/trace.h
+++ b/arch/loongarch/kvm/trace.h
@@ -102,6 +102,8 @@ TRACE_EVENT(kvm_exit_gspr,
#define KVM_TRACE_AUX_DISCARD 4
#define KVM_TRACE_AUX_FPU 1
+#define KVM_TRACE_AUX_LSX 2
+#define KVM_TRACE_AUX_LASX 3
#define kvm_trace_symbol_aux_op \
{ KVM_TRACE_AUX_SAVE, "save" }, \
@@ -111,7 +113,9 @@ TRACE_EVENT(kvm_exit_gspr,
{ KVM_TRACE_AUX_DISCARD, "discard" }
#define kvm_trace_symbol_aux_state \
- { KVM_TRACE_AUX_FPU, "FPU" }
+ { KVM_TRACE_AUX_FPU, "FPU" }, \
+ { KVM_TRACE_AUX_LSX, "LSX" }, \
+ { KVM_TRACE_AUX_LASX, "LASX" }
TRACE_EVENT(kvm_aux,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op,
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 73d0c2b9c1a5..27701991886d 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -95,7 +95,6 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
* check vmid before vcpu enter guest
*/
local_irq_disable();
- kvm_acquire_timer(vcpu);
kvm_deliver_intr(vcpu);
kvm_deliver_exception(vcpu);
/* Make sure the vcpu mode has been written */
@@ -187,8 +186,15 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvm_pending_timer(vcpu) ||
+ int ret;
+
+ /* Protect from TOD sync and vcpu_load/put() */
+ preempt_disable();
+ ret = kvm_pending_timer(vcpu) ||
kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT) & (1 << INT_TI);
+ preempt_enable();
+
+ return ret;
}
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
@@ -244,23 +250,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
return -EINVAL;
}
-/**
- * kvm_migrate_count() - Migrate timer.
- * @vcpu: Virtual CPU.
- *
- * Migrate hrtimer to the current CPU by cancelling and restarting it
- * if the hrtimer is active.
- *
- * Must be called when the vCPU is migrated to a different CPU, so that
- * the timer can interrupt the guest at the new CPU, and the timer irq can
- * be delivered to the vCPU.
- */
-static void kvm_migrate_count(struct kvm_vcpu *vcpu)
-{
- if (hrtimer_cancel(&vcpu->arch.swtimer))
- hrtimer_restart(&vcpu->arch.swtimer);
-}
-
static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
{
unsigned long gintc;
@@ -309,6 +298,76 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
return ret;
}
+static int _kvm_get_cpucfg(int id, u64 *v)
+{
+ int ret = 0;
+
+ if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
+ return -EINVAL;
+
+ switch (id) {
+ case 2:
+ /* Return CPUCFG2 features which have been supported by KVM */
+ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP |
+ CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV |
+ CPUCFG2_LAM;
+ /*
+ * If LSX is supported by CPU, it is also supported by KVM,
+ * as we implement it.
+ */
+ if (cpu_has_lsx)
+ *v |= CPUCFG2_LSX;
+ /*
+ * if LASX is supported by CPU, it is also supported by KVM,
+ * as we implement it.
+ */
+ if (cpu_has_lasx)
+ *v |= CPUCFG2_LASX;
+
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static int kvm_check_cpucfg(int id, u64 val)
+{
+ u64 mask;
+ int ret = 0;
+
+ if (id < 0 && id >= KVM_MAX_CPUCFG_REGS)
+ return -EINVAL;
+
+ if (_kvm_get_cpucfg(id, &mask))
+ return ret;
+
+ switch (id) {
+ case 2:
+ /* CPUCFG2 features checking */
+ if (val & ~mask)
+ /* The unsupported features should not be set */
+ ret = -EINVAL;
+ else if (!(val & CPUCFG2_LLFTP))
+ /* The LLFTP must be set, as guest must has a constant timer */
+ ret = -EINVAL;
+ else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
+ /* Single and double float point must both be set when enable FP */
+ ret = -EINVAL;
+ else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
+ /* FP should be set when enable LSX */
+ ret = -EINVAL;
+ else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
+ /* LSX, FP should be set when enable LASX, and FP has been checked before. */
+ ret = -EINVAL;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg, u64 *v)
{
@@ -378,10 +437,10 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
break;
case KVM_REG_LOONGARCH_CPUCFG:
id = KVM_GET_IOC_CPUCFG_IDX(reg->id);
- if (id >= 0 && id < KVM_MAX_CPUCFG_REGS)
- vcpu->arch.cpucfg[id] = (u32)v;
- else
- ret = -EINVAL;
+ ret = kvm_check_cpucfg(id, v);
+ if (ret)
+ break;
+ vcpu->arch.cpucfg[id] = (u32)v;
break;
case KVM_REG_LOONGARCH_KVM:
switch (reg->id) {
@@ -471,10 +530,94 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case 2:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = 0;
+ uint64_t val;
+ uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
+
+ ret = _kvm_get_cpucfg(attr->attr, &val);
+ if (ret)
+ return ret;
+
+ put_user(val, uaddr);
+
+ return ret;
+}
+
+static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ return -ENXIO;
+}
+
+static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
long r;
+ struct kvm_device_attr attr;
void __user *argp = (void __user *)arg;
struct kvm_vcpu *vcpu = filp->private_data;
@@ -514,6 +657,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
+ case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_has_attr(vcpu, &attr);
+ break;
+ }
+ case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_get_attr(vcpu, &attr);
+ break;
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_set_attr(vcpu, &attr);
+ break;
+ }
default:
r = -ENOIOCTLCMD;
break;
@@ -561,12 +725,96 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
preempt_enable();
}
+#ifdef CONFIG_CPU_HAS_LSX
+/* Enable LSX and restore context */
+int kvm_own_lsx(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+
+ /* Enable LSX for guest */
+ set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
+ switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ case KVM_LARCH_FPU:
+ /*
+ * Guest FPU state already loaded,
+ * only restore upper LSX state
+ */
+ _restore_lsx_upper(&vcpu->arch.fpu);
+ break;
+ default:
+ /* Neither FP or LSX already active,
+ * restore full LSX state
+ */
+ kvm_restore_lsx(&vcpu->arch.fpu);
+ break;
+ }
+
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
+ preempt_enable();
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+/* Enable LASX and restore context */
+int kvm_own_lasx(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch) || !kvm_guest_has_lasx(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+
+ set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
+ switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
+ case KVM_LARCH_LSX:
+ case KVM_LARCH_LSX | KVM_LARCH_FPU:
+ /* Guest LSX state already loaded, only restore upper LASX state */
+ _restore_lasx_upper(&vcpu->arch.fpu);
+ break;
+ case KVM_LARCH_FPU:
+ /* Guest FP state already loaded, only restore upper LSX & LASX state */
+ _restore_lsx_upper(&vcpu->arch.fpu);
+ _restore_lasx_upper(&vcpu->arch.fpu);
+ break;
+ default:
+ /* Neither FP or LSX already active, restore full LASX state */
+ kvm_restore_lasx(&vcpu->arch.fpu);
+ break;
+ }
+
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
+ preempt_enable();
+
+ return 0;
+}
+#endif
+
/* Save context and disable FPU */
void kvm_lose_fpu(struct kvm_vcpu *vcpu)
{
preempt_disable();
- if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) {
+ kvm_save_lasx(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX);
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LASX);
+
+ /* Disable LASX & LSX & FPU */
+ clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
+ } else if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) {
+ kvm_save_lsx(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU);
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LSX);
+
+ /* Disable LSX & FPU */
+ clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN);
+ } else if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
kvm_save_fpu(&vcpu->arch.fpu);
vcpu->arch.aux_inuse &= ~KVM_LARCH_FPU;
trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU);
@@ -789,17 +1037,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
unsigned long flags;
local_irq_save(flags);
- if (vcpu->arch.last_sched_cpu != cpu) {
- kvm_debug("[%d->%d]KVM vCPU[%d] switch\n",
- vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
- /*
- * Migrate the timer interrupt to the current CPU so that it
- * always interrupts the guest and synchronously triggers a
- * guest timer interrupt.
- */
- kvm_migrate_count(vcpu);
- }
-
/* Restore guest state to registers */
_kvm_vcpu_load(vcpu, cpu);
local_irq_restore(flags);
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index 2c0a411f23aa..0b95d32b30c9 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -284,12 +284,16 @@ static void setup_tlb_handler(int cpu)
set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
- }
+ } else {
+ int vec_sz __maybe_unused;
+ void *addr __maybe_unused;
+ struct page *page __maybe_unused;
+
+ /* Avoid lockdep warning */
+ rcutree_report_cpu_starting(cpu);
+
#ifdef CONFIG_NUMA
- else {
- void *addr;
- struct page *page;
- const int vec_sz = sizeof(exception_handlers);
+ vec_sz = sizeof(exception_handlers);
if (pcpu_handlers[cpu])
return;
@@ -305,8 +309,8 @@ static void setup_tlb_handler(int cpu)
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY);
csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY);
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
- }
#endif
+ }
}
void tlb_init(int cpu)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 4fcd6cd6da23..e73323d759d0 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -201,6 +201,11 @@ bool bpf_jit_supports_kfunc_call(void)
return true;
}
+bool bpf_jit_supports_far_kfunc_call(void)
+{
+ return true;
+}
+
/* initialized on the first pass of build_body() */
static int out_offset = -1;
static int emit_bpf_tail_call(struct jit_ctx *ctx)
@@ -465,7 +470,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
const u8 dst = regmap[insn->dst_reg];
const s16 off = insn->off;
const s32 imm = insn->imm;
- const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
switch (code) {
@@ -923,8 +927,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* dst = imm64 */
case BPF_LD | BPF_IMM | BPF_DW:
+ {
+ const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
+
move_imm(ctx, dst, imm64, is32);
return 1;
+ }
/* dst = *(size *)(src + off) */
case BPF_LDX | BPF_MEM | BPF_B:
diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c
index 3a74d493eb3e..17b2987c2bf5 100644
--- a/arch/m68k/emu/nfcon.c
+++ b/arch/m68k/emu/nfcon.c
@@ -23,9 +23,9 @@ static int stderr_id;
static struct tty_port nfcon_tty_port;
static struct tty_driver *nfcon_tty_driver;
-static void nfputs(const char *str, unsigned int count)
+static void nfputs(const u8 *str, size_t count)
{
- char buf[68];
+ u8 buf[68];
unsigned long phys = virt_to_phys(buf);
buf[64] = 0;
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index ed12358c4783..9a71b0148461 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -191,6 +191,7 @@ extern void cache_push_v(unsigned long vaddr, int len);
#define flush_cache_all() __flush_cache_all()
#define flush_cache_vmap(start, end) flush_cache_all()
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) flush_cache_all()
static inline void flush_cache_mm(struct mm_struct *mm)
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 7b2d7f6f23c0..4da7bc4ac4a3 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -3,11 +3,9 @@ CONFIG_POSIX_MQUEUE=y
CONFIG_AUDIT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_EXPERT=y
# CONFIG_BASE_FULL is not set
CONFIG_KALLSYMS_ALL=y
-CONFIG_EXPERT=y
CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1
CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1
CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1
@@ -20,7 +18,6 @@ CONFIG_CMDLINE_FORCE=y
CONFIG_HIGHMEM=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_EFI_PARTITION is not set
CONFIG_CMA=y
@@ -28,6 +25,10 @@ CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
# CONFIG_IPV6 is not set
CONFIG_BRIDGE=m
CONFIG_PCI=y
@@ -43,6 +44,7 @@ CONFIG_NETDEVICES=y
CONFIG_XILINX_EMACLITE=y
CONFIG_XILINX_AXI_EMAC=y
CONFIG_XILINX_LL_TEMAC=y
+CONFIG_MARVELL_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -77,14 +79,13 @@ CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
CONFIG_CIFS=y
-CONFIG_CIFS_STATS2=y
CONFIG_ENCRYPTED_KEYS=y
CONFIG_DMA_CMA=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_KGDB=y
CONFIG_KGDB_TESTS=y
CONFIG_KGDB_KDB=y
-CONFIG_DEBUG_SLAB=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_SPINLOCK=y
diff --git a/arch/mips/alchemy/common/prom.c b/arch/mips/alchemy/common/prom.c
index b13d8adf3be4..20d30f6265cd 100644
--- a/arch/mips/alchemy/common/prom.c
+++ b/arch/mips/alchemy/common/prom.c
@@ -40,6 +40,7 @@
#include <linux/string.h>
#include <asm/bootinfo.h>
+#include <prom.h>
int prom_argc;
char **prom_argv;
diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 2388d68786f4..a7a6d31a7a41 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -30,13 +30,11 @@
#include <linux/mm.h>
#include <linux/dma-map-ops.h> /* for dma_default_coherent */
+#include <asm/bootinfo.h>
#include <asm/mipsregs.h>
#include <au1000.h>
-extern void __init board_setup(void);
-extern void __init alchemy_set_lpj(void);
-
static bool alchemy_dma_coherent(void)
{
switch (alchemy_get_cputype()) {
diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
index f521874ebb07..67f067706af2 100644
--- a/arch/mips/alchemy/devboards/db1200.c
+++ b/arch/mips/alchemy/devboards/db1200.c
@@ -847,7 +847,7 @@ int __init db1200_dev_setup(void)
i2c_register_board_info(0, db1200_i2c_devs,
ARRAY_SIZE(db1200_i2c_devs));
spi_register_board_info(db1200_spi_devs,
- ARRAY_SIZE(db1200_i2c_devs));
+ ARRAY_SIZE(db1200_spi_devs));
/* SWITCHES: S6.8 I2C/SPI selector (OFF=I2C ON=SPI)
* S6.7 AC97/I2S selector (OFF=AC97 ON=I2S)
diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index fd91d9c9a252..6c6837181f55 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c
@@ -589,7 +589,7 @@ int __init db1550_dev_setup(void)
i2c_register_board_info(0, db1550_i2c_devs,
ARRAY_SIZE(db1550_i2c_devs));
spi_register_board_info(db1550_spi_devs,
- ARRAY_SIZE(db1550_i2c_devs));
+ ARRAY_SIZE(db1550_spi_devs));
c = clk_get(NULL, "psc0_intclk");
if (!IS_ERR(c)) {
diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c
index 437a737c01dd..46994f9bb821 100644
--- a/arch/mips/bcm47xx/buttons.c
+++ b/arch/mips/bcm47xx/buttons.c
@@ -147,21 +147,21 @@ static const struct gpio_keys_button
bcm47xx_buttons_buffalo_whr_g125[] __initconst = {
BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
BCM47XX_GPIO_KEY(4, KEY_RESTART),
- BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode swtich */
+ BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode switch */
};
static const struct gpio_keys_button
bcm47xx_buttons_buffalo_whr_g54s[] __initconst = {
BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
BCM47XX_GPIO_KEY_H(4, KEY_RESTART),
- BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode swtich */
+ BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode switch */
};
static const struct gpio_keys_button
bcm47xx_buttons_buffalo_whr_hp_g54[] __initconst = {
BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
BCM47XX_GPIO_KEY(4, KEY_RESTART),
- BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode swtich */
+ BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode switch */
};
static const struct gpio_keys_button
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 01aff80a5967..99f321b6e417 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -702,7 +702,7 @@ static struct ssb_sprom bcm63xx_sprom = {
.boardflags_hi = 0x0000,
};
-int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+static int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
{
if (bus->bustype == SSB_BUSTYPE_PCI) {
memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c
index 86a6e2590866..3144965fb7dc 100644
--- a/arch/mips/bcm63xx/clk.c
+++ b/arch/mips/bcm63xx/clk.c
@@ -174,7 +174,7 @@ static void enetsw_set(struct clk *clk, int enable)
}
if (enable) {
- /* reset switch core afer clock change */
+ /* reset switch core after clock change */
bcm63xx_core_set_reset(BCM63XX_RESET_ENETSW, 1);
msleep(10);
bcm63xx_core_set_reset(BCM63XX_RESET_ENETSW, 0);
@@ -304,7 +304,7 @@ static void xtm_set(struct clk *clk, int enable)
bcm_hwclock_set(CKCTL_6368_SAR_EN, enable);
if (enable) {
- /* reset sar core afer clock change */
+ /* reset sar core after clock change */
bcm63xx_core_set_reset(BCM63XX_RESET_SAR, 1);
mdelay(1);
bcm63xx_core_set_reset(BCM63XX_RESET_SAR, 0);
diff --git a/arch/mips/bcm63xx/dev-rng.c b/arch/mips/bcm63xx/dev-rng.c
index d277b4dc6c68..f94151f7c96f 100644
--- a/arch/mips/bcm63xx/dev-rng.c
+++ b/arch/mips/bcm63xx/dev-rng.c
@@ -26,7 +26,7 @@ static struct platform_device bcm63xx_rng_device = {
.resource = rng_resources,
};
-int __init bcm63xx_rng_register(void)
+static int __init bcm63xx_rng_register(void)
{
if (!BCMCPU_IS_6368())
return -ENODEV;
diff --git a/arch/mips/bcm63xx/dev-uart.c b/arch/mips/bcm63xx/dev-uart.c
index 3bc7f3bfc9ad..5d6bf0445b29 100644
--- a/arch/mips/bcm63xx/dev-uart.c
+++ b/arch/mips/bcm63xx/dev-uart.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <bcm63xx_cpu.h>
+#include <bcm63xx_dev_uart.h>
static struct resource uart0_resources[] = {
{
diff --git a/arch/mips/bcm63xx/dev-wdt.c b/arch/mips/bcm63xx/dev-wdt.c
index 42130914a3c2..302bf7ed5ad5 100644
--- a/arch/mips/bcm63xx/dev-wdt.c
+++ b/arch/mips/bcm63xx/dev-wdt.c
@@ -34,7 +34,7 @@ static struct platform_device bcm63xx_wdt_device = {
},
};
-int __init bcm63xx_wdt_register(void)
+static int __init bcm63xx_wdt_register(void)
{
wdt_resources[0].start = bcm63xx_regset_address(RSET_WDT);
wdt_resources[0].end = wdt_resources[0].start;
diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c
index 2548013442f6..6240a8f88ea3 100644
--- a/arch/mips/bcm63xx/irq.c
+++ b/arch/mips/bcm63xx/irq.c
@@ -72,7 +72,7 @@ static inline int enable_irq_for_cpu(int cpu, struct irq_data *d,
*/
#define BUILD_IPIC_INTERNAL(width) \
-void __dispatch_internal_##width(int cpu) \
+static void __dispatch_internal_##width(int cpu) \
{ \
u32 pending[width / 32]; \
unsigned int src, tgt; \
diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c
index d811e3e03f81..c13ddb544a23 100644
--- a/arch/mips/bcm63xx/setup.c
+++ b/arch/mips/bcm63xx/setup.c
@@ -159,7 +159,7 @@ void __init plat_mem_setup(void)
board_setup();
}
-int __init bcm63xx_register_devices(void)
+static int __init bcm63xx_register_devices(void)
{
/* register gpiochip */
bcm63xx_gpio_init();
diff --git a/arch/mips/bcm63xx/timer.c b/arch/mips/bcm63xx/timer.c
index a86065854c0c..74b83807df30 100644
--- a/arch/mips/bcm63xx/timer.c
+++ b/arch/mips/bcm63xx/timer.c
@@ -178,7 +178,7 @@ int bcm63xx_timer_set(int id, int monotonic, unsigned int countdown_us)
EXPORT_SYMBOL(bcm63xx_timer_set);
-int bcm63xx_timer_init(void)
+static int bcm63xx_timer_init(void)
{
int ret, irq;
u32 reg;
diff --git a/arch/mips/boot/compressed/dbg.c b/arch/mips/boot/compressed/dbg.c
index 2f1ac38fe1cc..95405292accd 100644
--- a/arch/mips/boot/compressed/dbg.c
+++ b/arch/mips/boot/compressed/dbg.c
@@ -3,7 +3,7 @@
* MIPS-specific debug support for pre-boot environment
*
* NOTE: putc() is board specific, if your board have a 16550 compatible uart,
- * please select SYS_SUPPORTS_ZBOOT_UART16550 for your machine. othewise, you
+ * please select SYS_SUPPORTS_ZBOOT_UART16550 for your machine. otherwise, you
* need to implement your own putc().
*/
#include <linux/compiler.h>
diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
index 5795d0af1e1b..d237a834b85e 100644
--- a/arch/mips/boot/compressed/head.S
+++ b/arch/mips/boot/compressed/head.S
@@ -25,8 +25,8 @@
/* Clear BSS */
PTR_LA a0, _edata
PTR_LA a2, _end
-1: sw zero, 0(a0)
- addiu a0, a0, 4
+1: PTR_S zero, 0(a0)
+ PTR_ADDIU a0, a0, PTRSIZE
bne a2, a0, 1b
PTR_LA a0, (.heap) /* heap address */
diff --git a/arch/mips/boot/elf2ecoff.c b/arch/mips/boot/elf2ecoff.c
index 6972b97235da..549c5d6ef6d7 100644
--- a/arch/mips/boot/elf2ecoff.c
+++ b/arch/mips/boot/elf2ecoff.c
@@ -443,7 +443,7 @@ int main(int argc, char *argv[])
efh.f_symptr = 0;
efh.f_nsyms = 0;
efh.f_opthdr = sizeof eah;
- efh.f_flags = 0x100f; /* Stripped, not sharable. */
+ efh.f_flags = 0x100f; /* Stripped, not shareable. */
memset(esecs, 0, sizeof esecs);
strcpy(esecs[0].s_name, ".text");
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 124817609ce0..af62a210a40b 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -113,7 +113,7 @@ static struct clocksource clocksource_mips = {
unsigned long long notrace sched_clock(void)
{
- /* 64-bit arithmatic can overflow, so use 128-bit. */
+ /* 64-bit arithmetic can overflow, so use 128-bit. */
u64 t1, t2, t3;
unsigned long long rv;
u64 mult = clocksource_mips.mult;
diff --git a/arch/mips/cavium-octeon/executive/cvmx-boot-vector.c b/arch/mips/cavium-octeon/executive/cvmx-boot-vector.c
index b7019d21808e..76446db66def 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-boot-vector.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-boot-vector.c
@@ -143,7 +143,7 @@ static void cvmx_boot_vector_init(void *mem)
uint64_t v = _cvmx_bootvector_data[i];
if (OCTEON_IS_OCTEON1PLUS() && (i == 0 || i == 7))
- v &= 0xffffffff00000000ull; /* KScratch not availble. */
+ v &= 0xffffffff00000000ull; /* KScratch not available */
cvmx_write_csr(CVMX_MIO_BOOT_LOC_ADR, i * 8);
cvmx_write_csr(CVMX_MIO_BOOT_LOC_DAT, v);
}
diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
index 334bf8e577e5..628ebdf4b9c5 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -264,7 +264,7 @@ int64_t cvmx_bootmem_phy_alloc(uint64_t req_size, uint64_t address_min,
* Convert !0 address_min and 0 address_max to special case of
* range that specifies an exact memory block to allocate. Do
* this before other checks and adjustments so that this
- * tranformation will be validated.
+ * transformation will be validated.
*/
if (address_min && !address_max)
address_max = address_min + req_size;
diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
index aa7bbf8d0df5..042a6bc44b5c 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
@@ -192,7 +192,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id,
}
/*
- * Shutdown a queue a free it's command buffers to the FPA. The
+ * Shutdown a queue and free its command buffers to the FPA. The
* hardware connected to the queue must be stopped before this
* function is called.
*
@@ -285,7 +285,7 @@ int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id)
/*
* Return the command buffer to be written to. The purpose of this
- * function is to allow CVMX routine access t othe low level buffer
+ * function is to allow CVMX routine access to the low level buffer
* for initial hardware setup. User applications should not call this
* function directly.
*
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c b/arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c
index 607b4e659579..1fceb7fd2c94 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c
@@ -103,7 +103,7 @@ uint32_t cvmx_helper_qlm_jtag_shift(int qlm, int bits, uint32_t data)
/**
* Shift long sequences of zeros into the QLM JTAG chain. It is
* common to need to shift more than 32 bits of zeros into the
- * chain. This function is a convience wrapper around
+ * chain. This function is a convenience wrapper around
* cvmx_helper_qlm_jtag_shift() to shift more than 32 bits of
* zeros at a time.
*
diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c
index 15faca494c80..6e70b859a0ac 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-pko.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c
@@ -615,7 +615,7 @@ int cvmx_pko_rate_limit_bits(int port, uint64_t bits_s, int burst)
/*
* Each packet has a 12 bytes of interframe gap, an 8 byte
* preamble, and a 4 byte CRC. These are not included in the
- * per word count. Multiply by 8 to covert to bits and divide
+ * per word count. Multiply by 8 to convert to bits and divide
* by 256 for limit granularity.
*/
pko_mem_port_rate0.s.rate_pkt = (12 + 8 + 4) * 8 * tokens_per_bit / 256;
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index f76783c24338..5e1dd4e6e82f 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -973,7 +973,7 @@ int __init octeon_prune_device_tree(void)
* zero.
*/
- /* Asume that CS1 immediately follows. */
+ /* Assume that CS1 immediately follows. */
mio_boot_reg_cfg.u64 =
cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(cs + 1));
region1_base = mio_boot_reg_cfg.s.base << 16;
diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c
index 2e099d55a564..9a266bf78339 100644
--- a/arch/mips/cobalt/setup.c
+++ b/arch/mips/cobalt/setup.c
@@ -23,9 +23,6 @@
#include <cobalt.h>
-extern void cobalt_machine_restart(char *command);
-extern void cobalt_machine_halt(void);
-
const char *get_system_type(void)
{
switch (cobalt_board_id) {
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index b51f738a39a0..4714074c8bd7 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -287,7 +287,8 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_FSCACHE_STATS=y
CONFIG_CACHEFILES=m
CONFIG_PROC_KCORE=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 38f17b658421..3389e6e885d9 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -238,7 +238,8 @@ CONFIG_BTRFS_FS=m
CONFIG_QUOTA=y
CONFIG_QFMT_V2=m
CONFIG_AUTOFS_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 07839a4b397e..78f498752066 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -356,7 +356,8 @@ CONFIG_QFMT_V2=m
CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_VIRTIO_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_MSDOS_FS=m
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
index 166d2ad372d1..54774f90c23e 100644
--- a/arch/mips/configs/pic32mzda_defconfig
+++ b/arch/mips/configs/pic32mzda_defconfig
@@ -68,7 +68,8 @@ CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index 66188739f54d..fb78e6fd5de4 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -37,7 +37,7 @@ static unsigned int nr_prom_mem __initdata;
*/
#define ARC_PAGE_SHIFT 12
-struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current)
+static struct linux_mdesc * __init ArcGetMemoryDescriptor(struct linux_mdesc *Current)
{
return (struct linux_mdesc *) ARC_CALL1(get_mdesc, Current);
}
diff --git a/arch/mips/fw/arc/promlib.c b/arch/mips/fw/arc/promlib.c
index 5e9e840a9314..93e1e70393ee 100644
--- a/arch/mips/fw/arc/promlib.c
+++ b/arch/mips/fw/arc/promlib.c
@@ -15,11 +15,11 @@
/*
* For 64bit kernels working with a 32bit ARC PROM pointer arguments
* for ARC calls need to reside in CKEG0/1. But as soon as the kernel
- * switches to it's first kernel thread stack is set to an address in
+ * switches to its first kernel thread stack is set to an address in
* XKPHYS, so anything on stack can't be used anymore. This is solved
- * by using a * static declartion variables are put into BSS, which is
+ * by using a * static declaration variables are put into BSS, which is
* linked to a CKSEG0 address. Since this is only used on UP platforms
- * there is not spinlock needed
+ * there is no spinlock needed
*/
#define O32_STATIC static
#else
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index f36c2519ed97..1f14132b3fc9 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -97,6 +97,8 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end)
__flush_cache_vmap();
}
+#define flush_cache_vmap_early(start, end) do { } while (0)
+
extern void (*__flush_cache_vunmap)(void);
static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
diff --git a/arch/mips/include/asm/debug.h b/arch/mips/include/asm/debug.h
index c7013e1cb53f..e70392429246 100644
--- a/arch/mips/include/asm/debug.h
+++ b/arch/mips/include/asm/debug.h
@@ -10,7 +10,7 @@
/*
* mips_debugfs_dir corresponds to the "mips" directory at the top level
- * of the DebugFS hierarchy. MIPS-specific DebugFS entires should be
+ * of the DebugFS hierarchy. MIPS-specific DebugFS entries should be
* placed beneath this directory.
*/
extern struct dentry *mips_debugfs_dir;
diff --git a/arch/mips/include/asm/dmi.h b/arch/mips/include/asm/dmi.h
index 27415a288adf..dc397f630c66 100644
--- a/arch/mips/include/asm/dmi.h
+++ b/arch/mips/include/asm/dmi.h
@@ -5,7 +5,7 @@
#include <linux/io.h>
#include <linux/memblock.h>
-#define dmi_early_remap(x, l) ioremap_cache(x, l)
+#define dmi_early_remap(x, l) ioremap(x, l)
#define dmi_early_unmap(x, l) iounmap(x)
#define dmi_remap(x, l) ioremap_cache(x, l)
#define dmi_unmap(x) iounmap(x)
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 85bbd967e05f..af58d6ae06b8 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -159,7 +159,7 @@ void iounmap(const volatile void __iomem *addr);
* address is not guaranteed to be usable directly as a virtual
* address.
*
- * This version of ioremap ensures that the memory is marked cachable by
+ * This version of ioremap ensures that the memory is marked cacheable by
* the CPU. Also enables full write-combining. Useful for some
* memory-like regions on I/O busses.
*/
@@ -177,7 +177,7 @@ void iounmap(const volatile void __iomem *addr);
* address is not guaranteed to be usable directly as a virtual
* address.
*
- * This version of ioremap ensures that the memory is marked uncachable
+ * This version of ioremap ensures that the memory is marked uncacheable
* but accelerated by means of write-combining feature. It is specifically
* useful for PCIe prefetchable windows, which may vastly improve a
* communications performance. If it was determined on boot stage, what
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 54a85f1d4f2c..179f320cc231 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -810,8 +810,6 @@ int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn);
pgd_t *kvm_pgd_alloc(void);
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-
/* Emulation */
enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause);
int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out);
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index a7eec3364a64..41546777902b 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -597,6 +597,9 @@
#include <asm/cpu.h>
+void alchemy_set_lpj(void);
+void board_setup(void);
+
/* helpers to access the SYS_* registers */
static inline unsigned long alchemy_rdsys(int regofs)
{
diff --git a/arch/mips/include/asm/mach-au1x00/au1000_dma.h b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
index 0a0cd4270c6f..b82e513c8523 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000_dma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
@@ -259,7 +259,7 @@ static inline void set_dma_mode(unsigned int dmanr, unsigned int mode)
if (!chan)
return;
/*
- * set_dma_mode is only allowed to change endianess, direction,
+ * set_dma_mode is only allowed to change endianness, direction,
* transfer size, device FIFO width, and coherency settings.
* Make sure anything else is masked off.
*/
diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
index 82bc2766e2ec..d820b481ac56 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
@@ -435,7 +435,7 @@ static inline void alchemy_gpio2_disable_int(int gpio2)
/**
* alchemy_gpio2_enable - Activate GPIO2 block.
*
- * The GPIO2 block must be enabled excplicitly to work. On systems
+ * The GPIO2 block must be enabled explicitly to work. On systems
* where this isn't done by the bootloader, this macro can be used.
*/
static inline void alchemy_gpio2_enable(void)
diff --git a/arch/mips/include/asm/mach-cobalt/cobalt.h b/arch/mips/include/asm/mach-cobalt/cobalt.h
index 5b9fce73f11d..97f9d5e9446d 100644
--- a/arch/mips/include/asm/mach-cobalt/cobalt.h
+++ b/arch/mips/include/asm/mach-cobalt/cobalt.h
@@ -19,4 +19,7 @@ extern int cobalt_board_id;
#define COBALT_BRD_ID_QUBE2 0x5
#define COBALT_BRD_ID_RAQ2 0x6
+void cobalt_machine_halt(void);
+void cobalt_machine_restart(char *command);
+
#endif /* __ASM_COBALT_H */
diff --git a/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h
index 5855ba1bd1ec..40eaa72e54d0 100644
--- a/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h
+++ b/arch/mips/include/asm/mach-lantiq/falcon/lantiq_soc.h
@@ -55,7 +55,7 @@ extern __iomem void *ltq_sys1_membase;
#define ltq_sys1_w32_mask(clear, set, reg) \
ltq_sys1_w32((ltq_sys1_r32(reg) & ~(clear)) | (set), reg)
-/* allow the gpio and pinctrl drivers to talk to eachother */
+/* allow the gpio and pinctrl drivers to talk to each other */
extern int pinctrl_falcon_get_range_size(int id);
extern void pinctrl_falcon_add_gpio_range(struct pinctrl_gpio_range *range);
diff --git a/arch/mips/include/asm/mach-loongson64/loongson_hwmon.h b/arch/mips/include/asm/mach-loongson64/loongson_hwmon.h
index 545f91f2ae16..721eafc4644e 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson_hwmon.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson_hwmon.h
@@ -42,7 +42,7 @@ struct loongson_fan_policy {
/* period between two check. (Unit: S) */
u8 adjust_period;
- /* fan adjust usually depend on a temprature input */
+ /* fan adjust usually depend on a temperature input */
get_temp_fun depend_temp;
/* up_step/down_step used when type is STEP_SPEED_POLICY */
diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
index b5be7511f6cd..fec767507604 100644
--- a/arch/mips/include/asm/mach-loongson64/loongson_regs.h
+++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h
@@ -227,7 +227,7 @@ static inline void csr_writeq(u64 val, u32 reg)
#define LOONGSON_CSR_NODECNT 0x408
#define LOONGSON_CSR_CPUTEMP 0x428
-/* PerCore CSR, only accessable by local cores */
+/* PerCore CSR, only accessible by local cores */
#define LOONGSON_CSR_IPI_STATUS 0x1000
#define LOONGSON_CSR_IPI_EN 0x1004
#define LOONGSON_CSR_IPI_SET 0x1008
diff --git a/arch/mips/include/asm/mach-malta/spaces.h b/arch/mips/include/asm/mach-malta/spaces.h
index d7e54971ec66..1ce4ba97852f 100644
--- a/arch/mips/include/asm/mach-malta/spaces.h
+++ b/arch/mips/include/asm/mach-malta/spaces.h
@@ -23,13 +23,13 @@
* The kernel is still located in 0x80000000(kseg0). However,
* the physical mask has been shifted to 0x80000000 which exploits the alias
* on the Malta board. As a result of which, we override the __pa_symbol
- * to peform direct mapping from virtual to physical addresses. In other
+ * to perform direct mapping from virtual to physical addresses. In other
* words, the 0x80000000 virtual address maps to 0x80000000 physical address
* which in turn aliases to 0x0. We do this in order to be able to use a flat
* 2GB of memory (0x80000000 - 0xffffffff) so we can avoid the I/O hole in
* 0x10000000 - 0x1fffffff.
* The last 64KB of physical memory are reserved for correct HIGHMEM
- * macros arithmetics.
+ * macros arithmetic.
*
*/
diff --git a/arch/mips/include/asm/mips-boards/bonito64.h b/arch/mips/include/asm/mips-boards/bonito64.h
index 5368891d424b..31a31fe78d77 100644
--- a/arch/mips/include/asm/mips-boards/bonito64.h
+++ b/arch/mips/include/asm/mips-boards/bonito64.h
@@ -16,7 +16,7 @@
*/
/* Revision 1.48 autogenerated on 08/17/99 15:20:01 */
-/* This bonito64 version editted from bonito.h Revision 1.48 on 11/09/00 */
+/* This bonito64 version edited from bonito.h Revision 1.48 on 11/09/00 */
#ifndef _ASM_MIPS_BOARDS_BONITO64_H
#define _ASM_MIPS_BOARDS_BONITO64_H
diff --git a/arch/mips/include/asm/mips-cpc.h b/arch/mips/include/asm/mips-cpc.h
index b54453f1648c..5f3a7a9f42bf 100644
--- a/arch/mips/include/asm/mips-cpc.h
+++ b/arch/mips/include/asm/mips-cpc.h
@@ -22,7 +22,7 @@ extern void __iomem *mips_cpc_base;
* the CPC
*
* Returns the default physical base address of the Cluster Power Controller
- * memory mapped registers. This is platform dependant & must therefore be
+ * memory mapped registers. This is platform dependent & must therefore be
* implemented per-platform.
*/
extern phys_addr_t mips_cpc_default_phys_base(void);
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 2d53704d9f24..ec58cb76d076 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -98,7 +98,7 @@
/*
* R4640/R4650 cp0 register names. These registers are listed
- * here only for completeness; without MMU these CPUs are not useable
+ * here only for completeness; without MMU these CPUs are not usable
* by Linux. A future ELKS port might take make Linux run on them
* though ...
*/
@@ -461,7 +461,7 @@
#define EXCCODE_THREAD 25 /* Thread exceptions (MT) */
#define EXCCODE_DSPDIS 26 /* DSP disabled exception */
#define EXCCODE_GE 27 /* Virtualized guest exception (VZ) */
-#define EXCCODE_CACHEERR 30 /* Parity/ECC occured on a core */
+#define EXCCODE_CACHEERR 30 /* Parity/ECC occurred on a core */
/* Implementation specific trap codes used by MIPS cores */
#define MIPS_EXCCODE_TLBPAR 16 /* TLB parity error exception */
diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
index c1c0b3230e0a..028bf1d6daee 100644
--- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h
+++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h
@@ -114,7 +114,7 @@ struct cvmx_bootinfo {
/*
* flags indicating various configuration options. These
- * flags supercede the 'flags' variable and should be used
+ * flags supersede the 'flags' variable and should be used
* instead if available.
*/
uint32_t config_flags;
diff --git a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
index a07a36f7d814..67e1b2162b19 100644
--- a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
+++ b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
@@ -145,7 +145,7 @@ typedef struct {
/**
* This structure contains the global state of all command queues.
* It is stored in a bootmem named block and shared by all
- * applications running on Octeon. Tickets are stored in a differnet
+ * applications running on Octeon. Tickets are stored in a different
* cache line that queue information to reduce the contention on the
* ll/sc used to get a ticket. If this is not the case, the update
* of queue state causes the ll/sc to fail quite often.
@@ -172,7 +172,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id,
int pool_size);
/**
- * Shutdown a queue a free it's command buffers to the FPA. The
+ * Shutdown a queue and free its command buffers to the FPA. The
* hardware connected to the queue must be stopped before this
* function is called.
*
@@ -194,7 +194,7 @@ int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id);
/**
* Return the command buffer to be written to. The purpose of this
- * function is to allow CVMX routine access t othe low level buffer
+ * function is to allow CVMX routine access to the low level buffer
* for initial hardware setup. User applications should not call this
* function directly.
*
diff --git a/arch/mips/include/asm/octeon/cvmx-pko.h b/arch/mips/include/asm/octeon/cvmx-pko.h
index 5fec8476e421..f18a7f24daf8 100644
--- a/arch/mips/include/asm/octeon/cvmx-pko.h
+++ b/arch/mips/include/asm/octeon/cvmx-pko.h
@@ -91,7 +91,7 @@ typedef enum {
} cvmx_pko_status_t;
/**
- * This enumeration represents the differnet locking modes supported by PKO.
+ * This enumeration represents the different locking modes supported by PKO.
*/
typedef enum {
/*
diff --git a/arch/mips/include/asm/octeon/cvmx-pow.h b/arch/mips/include/asm/octeon/cvmx-pow.h
index a3b23811e0c3..21b4378244fa 100644
--- a/arch/mips/include/asm/octeon/cvmx-pow.h
+++ b/arch/mips/include/asm/octeon/cvmx-pow.h
@@ -1342,7 +1342,7 @@ static inline void cvmx_pow_tag_sw_wait(void)
* This function does NOT wait for previous tag switches to complete,
* so the caller must ensure that there is not a pending tag switch.
*
- * @wait: When set, call stalls until work becomes avaiable, or times out.
+ * @wait: When set, call stalls until work becomes available, or times out.
* If not set, returns immediately.
*
* Returns: the WQE pointer from POW. Returns NULL if no work
@@ -1376,7 +1376,7 @@ static inline struct cvmx_wqe *cvmx_pow_work_request_sync_nocheck(cvmx_pow_wait_
* This function waits for any previous tag switch to complete before
* requesting the new work.
*
- * @wait: When set, call stalls until work becomes avaiable, or times out.
+ * @wait: When set, call stalls until work becomes available, or times out.
* If not set, returns immediately.
*
* Returns: the WQE pointer from POW. Returns NULL if no work
diff --git a/arch/mips/include/asm/octeon/octeon-model.h b/arch/mips/include/asm/octeon/octeon-model.h
index 6c68517c2770..e53b61a8e32f 100644
--- a/arch/mips/include/asm/octeon/octeon-model.h
+++ b/arch/mips/include/asm/octeon/octeon-model.h
@@ -54,7 +54,7 @@
#define OM_CHECK_SUBMODEL 0x02000000
/* Match all models previous than the one specified */
#define OM_MATCH_PREVIOUS_MODELS 0x04000000
-/* Ignores the minor revison on newer parts */
+/* Ignores the minor revision on newer parts */
#define OM_IGNORE_MINOR_REVISION 0x08000000
#define OM_FLAG_MASK 0xff000000
@@ -226,7 +226,7 @@
#define OCTEON_CN52XX_PASS2 OCTEON_CN52XX_PASS2_X
/*
- * CN3XXX models with old revision enconding
+ * CN3XXX models with old revision encoding
*/
#define OCTEON_CN38XX_PASS1 0x000d0000
#define OCTEON_CN38XX_PASS2 0x000d0001
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 5978a8dfb917..ef9585d96f6b 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -173,7 +173,7 @@ static inline unsigned long ___pa(unsigned long x)
if (IS_ENABLED(CONFIG_64BIT)) {
/*
* For MIPS64 the virtual address may either be in one of
- * the compatibility segements ckseg0 or ckseg1, or it may
+ * the compatibility segments ckseg0 or ckseg1, or it may
* be in xkphys.
*/
return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 3fd6e22c108b..d993df6302dc 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -23,7 +23,7 @@
#ifdef CONFIG_PCI_DRIVERS_LEGACY
/*
- * Each pci channel is a top-level PCI bus seem by CPU. A machine with
+ * Each PCI channel is a top-level PCI bus seem by CPU. A machine with
* multiple PCI channels may have multiple PCI host controllers or a
* single controller supporting multiple channels.
*/
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index 421e78c30253..088623ba7b8b 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -201,7 +201,7 @@ enum pgtable_bits {
* The final layouts of the PTE bits are:
*
* 64-bit, R1 or earlier: CCC D V G [S H] M A W R P
- * 32-bit, R1 or earler: CCC D V G M A W R P
+ * 32-bit, R1 or earlier: CCC D V G M A W R P
* 64-bit, R2 or later: CCC D V G RI/R XI [S H] M A W P
* 32-bit, R2 or later: CCC D V G RI/R XI M A W P
*/
diff --git a/arch/mips/include/asm/sgi/mc.h b/arch/mips/include/asm/sgi/mc.h
index 3a070cec97e7..5e96f9d32624 100644
--- a/arch/mips/include/asm/sgi/mc.h
+++ b/arch/mips/include/asm/sgi/mc.h
@@ -96,7 +96,7 @@ struct sgimc_regs {
volatile u32 lbursttp; /* Time period for long bursts */
/* MC chip can drive up to 4 bank 4 SIMMs each. All SIMMs in bank must
- * be the same size. The size encoding for supported SIMMs is bellow */
+ * be the same size. The size encoding for supported SIMMs is below */
u32 _unused11[9];
volatile u32 mconfig0; /* Memory config register zero */
u32 _unused12;
diff --git a/arch/mips/include/asm/sn/klconfig.h b/arch/mips/include/asm/sn/klconfig.h
index 117f85e4bef5..3d1670b3e052 100644
--- a/arch/mips/include/asm/sn/klconfig.h
+++ b/arch/mips/include/asm/sn/klconfig.h
@@ -851,7 +851,7 @@ typedef union kldev_s { /* for device structure allocation */
/*
* TBD - Allocation issues.
*
- * Do we need to Mark off sepatate heaps for lboard_t, rboard_t, component,
+ * Do we need to Mark off separate heaps for lboard_t, rboard_t, component,
* errinfo and allocate from them, or have a single heap and allocate all
* structures from it. Debug is easier in the former method since we can
* dump all similar structs in one command, but there will be lots of holes,
diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h
index aabd097933fe..44c04a82d0b7 100644
--- a/arch/mips/include/asm/sync.h
+++ b/arch/mips/include/asm/sync.h
@@ -19,7 +19,7 @@
*
* Ordering barriers can be more efficient than completion barriers, since:
*
- * a) Ordering barriers only require memory access instructions which preceed
+ * a) Ordering barriers only require memory access instructions which precede
* them in program order (older instructions) to reach a point in the
* load/store datapath beyond which reordering is not possible before
* allowing memory access instructions which follow them (younger
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index ecae7470faa4..b9d76e8ac5a2 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -27,7 +27,7 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long tp_value; /* thread pointer */
__u32 cpu; /* current CPU */
- int preempt_count; /* 0 => preemptable, <0 => BUG */
+ int preempt_count; /* 0 => preemptible, <0 => BUG */
struct pt_regs *regs;
long syscall; /* syscall number */
};
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
index 2e107886f97a..7ef06dcdc46e 100644
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h
@@ -46,7 +46,7 @@ typedef unsigned int cycles_t;
*
* There is a suggested workaround and also the erratum can't strike if
* the compare interrupt isn't being used as the clock source device.
- * However for now the implementaton of this function doesn't get these
+ * However for now the implementation of this function doesn't get these
* fine details right.
*/
static inline int can_use_mips_counter(unsigned int prid)
diff --git a/arch/mips/include/asm/vdso/vdso.h b/arch/mips/include/asm/vdso/vdso.h
index a327ca21270e..6cd88191fefa 100644
--- a/arch/mips/include/asm/vdso/vdso.h
+++ b/arch/mips/include/asm/vdso/vdso.h
@@ -32,7 +32,7 @@ static inline unsigned long get_vdso_base(void)
#else
/*
* Get the base load address of the VDSO. We have to avoid generating
- * relocations and references to the GOT because ld.so does not peform
+ * relocations and references to the GOT because ld.so does not perform
* relocations on the VDSO. We use the current offset from the VDSO base
* and perform a PC-relative branch which gives the absolute address in
* ra, and take the difference. The assembler chokes on
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index c6e1fc77c996..9c48d9a21aa0 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -88,7 +88,7 @@
#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
-#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
+#define MADV_DONTDUMP 16 /* Explicitly exclude from core dump,
overrides the coredump filter bits */
#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h
index 128af72f2dfe..d546642fc67e 100644
--- a/arch/mips/include/uapi/asm/msgbuf.h
+++ b/arch/mips/include/uapi/asm/msgbuf.h
@@ -62,7 +62,7 @@ struct msqid64_ds {
unsigned long __unused5;
};
#else
-#warning no endianess set
+#warning no endianness set
#endif
#endif /* _ASM_MSGBUF_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index de7460c3a72e..bda7f193baab 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1138,7 +1138,7 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
* This processor doesn't have an MMU, so it's not
* "real easy" to run Linux on it. It is left purely
* for documentation. Commented out because it shares
- * it's c0_prid id number with the TX3900.
+ * its c0_prid id number with the TX3900.
*/
c->cputype = CPU_R4650;
__cpu_name[cpu] = "R4650";
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 5582a4ca1e9e..7aa2c2360ff6 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -11,6 +11,7 @@
#include <asm/cpu-features.h>
#include <asm/cpu-info.h>
+#include <asm/fpu.h>
#ifdef CONFIG_MIPS_FP_SUPPORT
@@ -309,6 +310,11 @@ void mips_set_personality_nan(struct arch_elf_state *state)
struct cpuinfo_mips *c = &boot_cpu_data;
struct task_struct *t = current;
+ /* Do this early so t->thread.fpu.fcr31 won't be clobbered in case
+ * we are preempted before the lose_fpu(0) in start_thread.
+ */
+ lose_fpu(0);
+
t->thread.fpu.fcr31 = c->fpu_csr31;
switch (state->nan_2008) {
case 0:
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index b6de8e88c1bd..a572ce36a24f 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -272,18 +272,17 @@ NESTED(except_vec_vi, 0, sp)
.set push
.set noreorder
PTR_LA v1, except_vec_vi_handler
-FEXPORT(except_vec_vi_lui)
- lui v0, 0 /* Patched */
jr v1
FEXPORT(except_vec_vi_ori)
- ori v0, 0 /* Patched */
+ ori v0, zero, 0 /* Offset in vi_handlers[] */
.set pop
END(except_vec_vi)
EXPORT(except_vec_vi_end)
/*
* Common Vectored Interrupt code
- * Complete the register saves and invoke the handler which is passed in $v0
+ * Complete the register saves and invoke the handler, $v0 holds
+ * offset into vi_handlers[]
*/
NESTED(except_vec_vi_handler, 0, sp)
SAVE_TEMP
@@ -331,6 +330,7 @@ NESTED(except_vec_vi_handler, 0, sp)
/* Save task's sp on IRQ stack so that unwinding can follow it */
LONG_S s1, 0(sp)
2:
+ PTR_L v0, vi_handlers(v0)
jalr v0
/* Restore sp */
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index 316b27d0d2fb..dc39f5b3fb83 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -55,7 +55,7 @@ NOKPROBE_SYMBOL(insn_has_delayslot);
* one; putting breakpoint on top of atomic ll/sc pair is bad idea;
* so we need to prevent it and refuse kprobes insertion for such
* instructions; cannot do much about breakpoint in the middle of
- * ll/sc pair; it is upto user to avoid those places
+ * ll/sc pair; it is up to user to avoid those places
*/
static int insn_has_ll_or_sc(union mips_instruction insn)
{
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index f88ce78e13e3..6062e6fa589a 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -28,6 +28,8 @@ __init void mips_set_machine_name(const char *name)
strscpy(mips_machine_name, name, sizeof(mips_machine_name));
pr_info("MIPS: machine is %s\n", mips_get_machine_name());
+
+ dump_stack_set_arch_desc(name);
}
char *mips_get_machine_name(void)
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c
index 58fc8d089402..7eeeaf1ff95d 100644
--- a/arch/mips/kernel/relocate.c
+++ b/arch/mips/kernel/relocate.c
@@ -380,7 +380,7 @@ void *__init relocate_kernel(void)
}
#endif /* CONFIG_USE_OF */
- /* Copy the kernel to it's new location */
+ /* Copy the kernel to its new location */
memcpy(loc_new, &_text, kernel_length);
/* Perform relocations on the new kernel */
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
index 8f0a7263a9d6..de894a0211d7 100644
--- a/arch/mips/kernel/relocate_kernel.S
+++ b/arch/mips/kernel/relocate_kernel.S
@@ -70,7 +70,7 @@ copy_word:
done:
#ifdef CONFIG_SMP
/* kexec_flag reset is signal to other CPUs what kernel
- was moved to it's location. Note - we need relocated address
+ was moved to its location. Note - we need relocated address
of kexec_flag. */
bal 1f
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 328426c3ed6f..9c30de151597 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -147,7 +147,7 @@ static unsigned long __init init_initrd(void)
/*
* Board specific code or command line parser should have
* already set up initrd_start and initrd_end. In these cases
- * perfom sanity checks and use them if all looks good.
+ * perform sanity checks and use them if all looks good.
*/
if (!initrd_start || initrd_end <= initrd_start)
goto disable;
@@ -322,11 +322,11 @@ static void __init bootmem_init(void)
panic("Incorrect memory mapping !!!");
if (max_pfn > PFN_DOWN(HIGHMEM_START)) {
+ max_low_pfn = PFN_DOWN(HIGHMEM_START);
#ifdef CONFIG_HIGHMEM
- highstart_pfn = PFN_DOWN(HIGHMEM_START);
+ highstart_pfn = max_low_pfn;
highend_pfn = max_pfn;
#else
- max_low_pfn = PFN_DOWN(HIGHMEM_START);
max_pfn = max_low_pfn;
#endif
}
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index ccbf580827f6..4a10f18a8806 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -570,7 +570,7 @@ void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
return (void __user __force *)(-1UL);
/*
- * FPU emulator may have it's own trampoline active just
+ * FPU emulator may have its own trampoline active just
* above the user stack, 16-bytes before the next lowest
* 16 byte boundary. Try to avoid trashing it.
*/
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c58c0c3c5b40..a1c1cb5de913 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64];
void reserve_exception_space(phys_addr_t addr, unsigned long size)
{
- memblock_reserve(addr, size);
+ /*
+ * reserve exception space on CPUs other than CPU0
+ * is too late, since memblock is unavailable when APs
+ * up
+ */
+ if (smp_processor_id() == 0)
+ memblock_reserve(addr, size);
}
void __init *set_except_vector(int n, void *addr)
@@ -2055,108 +2061,71 @@ static void do_default_vi(void)
panic("Caught unexpected vectored interrupt.");
}
-static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs)
+void *set_vi_handler(int n, vi_handler_t addr)
{
+ extern const u8 except_vec_vi[];
+ extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
+ extern const u8 rollback_except_vec_vi[];
unsigned long handler;
unsigned long old_handler = vi_handlers[n];
int srssets = current_cpu_data.srsets;
u16 *h;
unsigned char *b;
+ const u8 *vec_start;
+ int ori_offset;
+ int handler_len;
BUG_ON(!cpu_has_veic && !cpu_has_vint);
if (addr == NULL) {
handler = (unsigned long) do_default_vi;
- srs = 0;
} else
handler = (unsigned long) addr;
vi_handlers[n] = handler;
b = (unsigned char *)(ebase + 0x200 + n*VECTORSPACING);
- if (srs >= srssets)
- panic("Shadow register set %d not supported", srs);
-
if (cpu_has_veic) {
if (board_bind_eic_interrupt)
- board_bind_eic_interrupt(n, srs);
+ board_bind_eic_interrupt(n, 0);
} else if (cpu_has_vint) {
/* SRSMap is only defined if shadow sets are implemented */
if (srssets > 1)
- change_c0_srsmap(0xf << n*4, srs << n*4);
+ change_c0_srsmap(0xf << n*4, 0 << n*4);
}
- if (srs == 0) {
- /*
- * If no shadow set is selected then use the default handler
- * that does normal register saving and standard interrupt exit
- */
- extern const u8 except_vec_vi[], except_vec_vi_lui[];
- extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
- extern const u8 rollback_except_vec_vi[];
- const u8 *vec_start = using_rollback_handler() ?
- rollback_except_vec_vi : except_vec_vi;
+ vec_start = using_rollback_handler() ? rollback_except_vec_vi :
+ except_vec_vi;
#if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
- const int lui_offset = except_vec_vi_lui - vec_start + 2;
- const int ori_offset = except_vec_vi_ori - vec_start + 2;
+ ori_offset = except_vec_vi_ori - vec_start + 2;
#else
- const int lui_offset = except_vec_vi_lui - vec_start;
- const int ori_offset = except_vec_vi_ori - vec_start;
+ ori_offset = except_vec_vi_ori - vec_start;
#endif
- const int handler_len = except_vec_vi_end - vec_start;
+ handler_len = except_vec_vi_end - vec_start;
- if (handler_len > VECTORSPACING) {
- /*
- * Sigh... panicing won't help as the console
- * is probably not configured :(
- */
- panic("VECTORSPACING too small");
- }
-
- set_handler(((unsigned long)b - ebase), vec_start,
-#ifdef CONFIG_CPU_MICROMIPS
- (handler_len - 1));
-#else
- handler_len);
-#endif
- h = (u16 *)(b + lui_offset);
- *h = (handler >> 16) & 0xffff;
- h = (u16 *)(b + ori_offset);
- *h = (handler & 0xffff);
- local_flush_icache_range((unsigned long)b,
- (unsigned long)(b+handler_len));
- }
- else {
+ if (handler_len > VECTORSPACING) {
/*
- * In other cases jump directly to the interrupt handler. It
- * is the handler's responsibility to save registers if required
- * (eg hi/lo) and return from the exception using "eret".
+ * Sigh... panicing won't help as the console
+ * is probably not configured :(
*/
- u32 insn;
+ panic("VECTORSPACING too small");
+ }
- h = (u16 *)b;
- /* j handler */
+ set_handler(((unsigned long)b - ebase), vec_start,
#ifdef CONFIG_CPU_MICROMIPS
- insn = 0xd4000000 | (((u32)handler & 0x07ffffff) >> 1);
+ (handler_len - 1));
#else
- insn = 0x08000000 | (((u32)handler & 0x0fffffff) >> 2);
+ handler_len);
#endif
- h[0] = (insn >> 16) & 0xffff;
- h[1] = insn & 0xffff;
- h[2] = 0;
- h[3] = 0;
- local_flush_icache_range((unsigned long)b,
- (unsigned long)(b+8));
- }
+ /* insert offset into vi_handlers[] */
+ h = (u16 *)(b + ori_offset);
+ *h = n * sizeof(handler);
+ local_flush_icache_range((unsigned long)b,
+ (unsigned long)(b+handler_len));
return (void *)old_handler;
}
-void *set_vi_handler(int n, vi_handler_t addr)
-{
- return set_vi_srs_handler(n, addr, 0);
-}
-
/*
* Timer interrupt
*/
@@ -2416,7 +2385,7 @@ void __init trap_init(void)
set_except_vector(i, handle_reserved);
/*
- * Copy the EJTAG debug exception vector handler code to it's final
+ * Copy the EJTAG debug exception vector handler code to its final
* destination.
*/
if (cpu_has_ejtag && board_ejtag_handler_setup)
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index e9a0cfd02ae2..737d0d4fdcd3 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -6,9 +6,9 @@
* Copyright (C) 2004, 2005 MIPS Technologies, Inc. All rights reserved.
* Copyright (C) 2013 Imagination Technologies Ltd.
*
- * VPE spport module for loading a MIPS SP program into VPE1. The SP
+ * VPE support module for loading a MIPS SP program into VPE1. The SP
* environment is rather simple since there are no TLBs. It needs
- * to be relocatable (or partiall linked). Initialize your stack in
+ * to be relocatable (or partially linked). Initialize your stack in
* the startup-code. The loader looks for the symbol __start and sets
* up the execution to resume from there. To load and run, simply do
* a cat SP 'binary' to the /dev/vpe1 device.
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index a8cdba75f98d..18e7a17d5115 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -20,13 +20,11 @@ config KVM
depends on HAVE_KVM
depends on MIPS_FP_SUPPORT
select EXPORT_UASM
- select PREEMPT_NOTIFIERS
+ select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
- select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_MMIO
- select MMU_NOTIFIER
- select INTERVAL_TREE
+ select KVM_GENERIC_MMU_NOTIFIER
select KVM_GENERIC_HARDWARE_ENABLING
help
Support for hosting Guest kernels.
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index e64372b8f66a..0feec52222fb 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -531,7 +531,7 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
* to be used for a period of time, but the exact ktime corresponding to the
* final Count that must be restored is not known.
*
- * It is gauranteed that a timer interrupt immediately after restore will be
+ * It is guaranteed that a timer interrupt immediately after restore will be
* handled, but not if CP0_Compare is exactly at @count. That case should
* already be handled when the hardware timer state is saved.
*
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index a3cf29365858..0c45767eacf6 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -108,10 +108,9 @@ void __init prom_init(void)
prom_init_cmdline();
#if defined(CONFIG_MIPS_MT_SMP)
- if (cpu_has_mipsmt) {
- lantiq_smp_ops = vsmp_smp_ops;
+ lantiq_smp_ops = vsmp_smp_ops;
+ if (cpu_has_mipsmt)
lantiq_smp_ops.init_secondary = lantiq_init_secondary;
- register_smp_ops(&lantiq_smp_ops);
- }
+ register_smp_ops(&lantiq_smp_ops);
#endif
}
diff --git a/arch/mips/loongson2ef/common/platform.c b/arch/mips/loongson2ef/common/platform.c
index 0084820cffaa..b10300a527af 100644
--- a/arch/mips/loongson2ef/common/platform.c
+++ b/arch/mips/loongson2ef/common/platform.c
@@ -17,7 +17,7 @@ static int __init loongson2_cpufreq_init(void)
{
struct cpuinfo_mips *c = &current_cpu_data;
- /* Only 2F revision and it's successors support CPUFreq */
+ /* Only 2F revision and its successors support CPUFreq */
if ((c->processor_id & PRID_REV_MASK) >= PRID_REV_LOONGSON2F)
return platform_device_register(&loongson2_cpufreq_device);
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index f25caa6aa9d3..553142c1f14f 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -103,6 +103,9 @@ void __init szmem(unsigned int node)
if (loongson_sysconf.vgabios_addr)
memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr),
SZ_256K);
+ /* set nid for reserved memory */
+ memblock_set_node((u64)node << 44, (u64)(node + 1) << 44,
+ &memblock.reserved, node);
}
#ifndef CONFIG_NUMA
diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c
index 8f61e93c0c5b..68dafd6d3e25 100644
--- a/arch/mips/loongson64/numa.c
+++ b/arch/mips/loongson64/numa.c
@@ -132,6 +132,8 @@ static void __init node_mem_init(unsigned int node)
/* Reserve pfn range 0~node[0]->node_start_pfn */
memblock_reserve(0, PAGE_SIZE * start_pfn);
+ /* set nid for reserved memory on node 0 */
+ memblock_set_node(0, 1ULL << 44, &memblock.reserved, 0);
}
}
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index 498bdc1bb0ed..5a990cdef91a 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -516,7 +516,7 @@ static void __init loongson3_prepare_cpus(unsigned int max_cpus)
}
/*
- * Setup the PC, SP, and GP of a secondary processor and start it runing!
+ * Setup the PC, SP, and GP of a secondary processor and start it running!
*/
static int loongson3_boot_secondary(int cpu, struct task_struct *idle)
{
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index b45bf026ee55..10413b6f6662 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1650,7 +1650,7 @@ static void coherency_setup(void)
/*
* c0_status.cu=0 specifies that updates by the sc instruction use
- * the coherency mode specified by the TLB; 1 means cachable
+ * the coherency mode specified by the TLB; 1 means cacheable
* coherent update on write will be used. Not all processors have
* this bit and; some wire it to zero, others like Toshiba had the
* silly idea of putting something else there ...
diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S
index 45dff5cd4b8e..e528583d1331 100644
--- a/arch/mips/mm/cex-gen.S
+++ b/arch/mips/mm/cex-gen.S
@@ -25,7 +25,7 @@
* This is a very bad place to be. Our cache error
* detection has triggered. If we have write-back data
* in the cache, we may not be able to recover. As a
- * first-order desperate measure, turn off KSEG0 cacheing.
+ * first-order desperate measure, turn off KSEG0 caching.
*/
mfc0 k0,CP0_CONFIG
li k1,~CONF_CM_CMASK
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index 3c4fc97b9f39..0f3cec663a12 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -138,7 +138,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
dev->dma_coherent = coherent;
}
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c2e0e5aebe90..39f129205b0c 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -422,8 +422,17 @@ void __init paging_init(void)
" %ldk highmem ignored\n",
(highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10));
max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn;
+
+ max_mapnr = max_low_pfn;
+ } else if (highend_pfn) {
+ max_mapnr = highend_pfn;
+ } else {
+ max_mapnr = max_low_pfn;
}
+#else
+ max_mapnr = max_low_pfn;
#endif
+ high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
free_area_init(max_zone_pfns);
}
@@ -459,13 +468,6 @@ void __init mem_init(void)
*/
BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
-#ifdef CONFIG_HIGHMEM
- max_mapnr = highend_pfn ? highend_pfn : max_low_pfn;
-#else
- max_mapnr = max_low_pfn;
-#endif
- high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
-
maar_init();
memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index b6dad2fd5575..d8243d61ef32 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -72,6 +72,10 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size,
flags == _CACHE_UNCACHED)
return (void __iomem *) CKSEG1ADDR(phys_addr);
+ /* Early remaps should use the unmapped regions til' VM is available */
+ if (WARN_ON_ONCE(!slab_is_available()))
+ return NULL;
+
/*
* Don't allow anybody to remap RAM that may be allocated by the page
* allocator, since that could lead to races & data clobbering.
diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
index f6db65410c65..173f7b36033b 100644
--- a/arch/mips/mm/tlb-r3k.c
+++ b/arch/mips/mm/tlb-r3k.c
@@ -183,7 +183,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
int idx, pid;
/*
- * Handle debugger faulting in for debugee.
+ * Handle debugger faulting in for debuggee.
*/
if (current->active_mm != vma->vm_mm)
return;
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 7e2a0011a6fb..4106084e57d7 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -301,7 +301,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
int idx, pid;
/*
- * Handle debugger faulting in for debugee.
+ * Handle debugger faulting in for debuggee.
*/
if (current->active_mm != vma->vm_mm)
return;
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index b4e1c783e617..4017fa0e2f68 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -789,7 +789,7 @@ void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
if (check_for_high_segbits) {
/*
- * The kernel currently implicitely assumes that the
+ * The kernel currently implicitly assumes that the
* MIPS SEGBITS parameter for the processor is
* (PGDIR_SHIFT+PGDIR_BITS) or less, and will never
* allocate virtual addresses outside the maximum
@@ -1715,7 +1715,7 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr,
/*
* Check if PTE is present, if not then jump to LABEL. PTR points to
* the page table where this PTE is located, PTE will be re-loaded
- * with it's original value.
+ * with its original value.
*/
static void
build_pte_present(u32 **p, struct uasm_reloc **r,
diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c
index ace5db3fbd17..40a878b672f5 100644
--- a/arch/mips/net/bpf_jit_comp32.c
+++ b/arch/mips/net/bpf_jit_comp32.c
@@ -95,7 +95,7 @@
/*
* Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
*
- * 1) Native register pairs are ordered according to CPU endiannes, following
+ * 1) Native register pairs are ordered according to CPU endianness, following
* the MIPS convention for passing 64-bit arguments and return values.
* 2) The eBPF return value, arguments and callee-saved registers are mapped
* to their native MIPS equivalents.
diff --git a/arch/mips/pci/ops-loongson2.c b/arch/mips/pci/ops-loongson2.c
index 0d1b36ba1c21..068113f5c49d 100644
--- a/arch/mips/pci/ops-loongson2.c
+++ b/arch/mips/pci/ops-loongson2.c
@@ -49,7 +49,7 @@ static int loongson_pcibios_config_access(unsigned char access_type,
*/
#ifdef CONFIG_CS5536
/* cs5536_pci_conf_read4/write4() will call _rdmsr/_wrmsr() to
- * access the regsters PCI_MSR_ADDR, PCI_MSR_DATA_LO,
+ * access the registers PCI_MSR_ADDR, PCI_MSR_DATA_LO,
* PCI_MSR_DATA_HI, which is bigger than PCI_MSR_CTRL, so, it
* will not go this branch, but the others. so, no calling dead
* loop here.
diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c
index 1c722dd0c130..58625d1b6465 100644
--- a/arch/mips/pci/pci-alchemy.c
+++ b/arch/mips/pci/pci-alchemy.c
@@ -453,7 +453,7 @@ static int alchemy_pci_probe(struct platform_device *pdev)
/* we can't ioremap the entire pci config space because it's too large,
* nor can we dynamically ioremap it because some drivers use the
- * PCI config routines from within atomic contex and that becomes a
+ * PCI config routines from within atomic context and that becomes a
* problem in get_vm_area(). Instead we use one wired TLB entry to
* handle all config accesses for all busses.
*/
diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c
index e17d862cfa4c..a925842ee125 100644
--- a/arch/mips/pci/pci-ar2315.c
+++ b/arch/mips/pci/pci-ar2315.c
@@ -16,7 +16,7 @@
* the CFG_SEL bit in the PCI_MISC_CONFIG register.
*
* Devices on the bus can perform DMA requests via chip BAR1. PCI host
- * controller BARs are programmend as if an external device is programmed.
+ * controller BARs are programmed as if an external device is programmed.
* Which means that during configuration, IDSEL pin of the chip should be
* asserted.
*
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 80f7293166bb..68a8cefed420 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -152,7 +152,7 @@ static int ltq_pci_startup(struct platform_device *pdev)
temp_buffer &= ~0xf0000;
/* enable internal arbiter */
temp_buffer |= (1 << INTERNAL_ARB_ENABLE_BIT);
- /* enable internal PCI master reqest */
+ /* enable internal PCI master request */
temp_buffer &= (~(3 << PCI_MASTER0_REQ_MASK_2BITS));
/* enable EBU request */
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index d19d9d456309..36d12cea3512 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -376,7 +376,7 @@ static void octeon_pci_initialize(void)
ctl_status.s.timer = 1;
cvmx_write_csr(CVMX_NPI_CTL_STATUS, ctl_status.u64);
- /* Deassert PCI reset and advertize PCX Host Mode Device Capability
+ /* Deassert PCI reset and advertise PCX Host Mode Device Capability
(64b) */
cvmx_write_csr(CVMX_CIU_SOFT_PRST, 0x4);
cvmx_read_csr(CVMX_CIU_SOFT_PRST);
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index 68d5211afea8..45ddbaa6c123 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -114,7 +114,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC3,
*
* The function is complicated by the ultimate brokenness of the IOC3 chip
* which is used in SGI systems. The IOC3 can only handle 32-bit PCI
- * accesses and does only decode parts of it's address space.
+ * accesses and does only decode parts of its address space.
*/
static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
int where, int size, u32 *value)
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index c9edd3fb380d..2583e318e8c6 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -1037,7 +1037,7 @@ retry:
in_fif_p_count = dbg_data.s.data & 0xff;
} while (in_fif_p_count != ((old_in_fif_p_count+1) & 0xff));
- /* Update in_fif_p_count for it's offset with respect to out_p_count */
+ /* Update in_fif_p_count for its offset with respect to out_p_count */
in_fif_p_count = (in_fif_p_count + in_p_offset) & 0xff;
/* Read the OUT_P_COUNT from the debug select */
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
index 137781d0bd0a..5a9fd3fe41d7 100644
--- a/arch/mips/ralink/mt7621.c
+++ b/arch/mips/ralink/mt7621.c
@@ -175,7 +175,7 @@ void __init prom_soc_init(struct ralink_soc_info *soc_info)
* mips_cm_probe() wipes out bootloader
* config for CM regions and we have to configure them
* again. This SoC cannot talk to pamlbus devices
- * witout proper iocu region set up.
+ * without proper iocu region set up.
*
* FIXME: it would be better to do this with values
* from DT, but we need this very early because
diff --git a/arch/mips/sgi-ip27/Makefile b/arch/mips/sgi-ip27/Makefile
index 27c14ede191e..9877fcc512b1 100644
--- a/arch/mips/sgi-ip27/Makefile
+++ b/arch/mips/sgi-ip27/Makefile
@@ -5,7 +5,7 @@
obj-y := ip27-berr.o ip27-irq.o ip27-init.o ip27-klconfig.o \
ip27-klnuma.o ip27-memory.o ip27-nmi.o ip27-reset.o ip27-timer.o \
- ip27-hubio.o ip27-xtalk.o
+ ip27-xtalk.o
obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o
obj-$(CONFIG_SMP) += ip27-smp.o
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index 923a63a51cda..9eb497cb5d52 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -22,6 +22,8 @@
#include <asm/traps.h>
#include <linux/uaccess.h>
+#include "ip27-common.h"
+
static void dump_hub_information(unsigned long errst0, unsigned long errst1)
{
static char *err_type[2][8] = {
@@ -57,7 +59,7 @@ static void dump_hub_information(unsigned long errst0, unsigned long errst1)
[st0.pi_stat0_fmt.s0_err_type] ? : "invalid");
}
-int ip27_be_handler(struct pt_regs *regs, int is_fixup)
+static int ip27_be_handler(struct pt_regs *regs, int is_fixup)
{
unsigned long errst0, errst1;
int data = regs->cp0_cause & 4;
diff --git a/arch/mips/sgi-ip27/ip27-common.h b/arch/mips/sgi-ip27/ip27-common.h
index ed008a08464c..a0059fa13934 100644
--- a/arch/mips/sgi-ip27/ip27-common.h
+++ b/arch/mips/sgi-ip27/ip27-common.h
@@ -10,6 +10,7 @@ extern void hub_rt_clock_event_init(void);
extern void hub_rtc_init(nasid_t nasid);
extern void install_cpu_nmi_handler(int slice);
extern void install_ipi(void);
+extern void ip27_be_init(void);
extern void ip27_reboot_setup(void);
extern const struct plat_smp_ops ip27_smp_ops;
extern unsigned long node_getfirstfree(nasid_t nasid);
@@ -17,4 +18,5 @@ extern void per_cpu_init(void);
extern void replicate_kernel_text(void);
extern void setup_replication_mask(void);
+
#endif /* __IP27_COMMON_H */
diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
deleted file mode 100644
index 8352eb6403b4..000000000000
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 1992-1997, 2000-2003 Silicon Graphics, Inc.
- * Copyright (C) 2004 Christoph Hellwig.
- *
- * Support functions for the HUB ASIC - mostly PIO mapping related.
- */
-
-#include <linux/bitops.h>
-#include <linux/string.h>
-#include <linux/mmzone.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/arch.h>
-#include <asm/sn/agent.h>
-#include <asm/sn/io.h>
-#include <asm/xtalk/xtalk.h>
-
-
-static int force_fire_and_forget = 1;
-
-/**
- * hub_pio_map - establish a HUB PIO mapping
- *
- * @hub: hub to perform PIO mapping on
- * @widget: widget ID to perform PIO mapping for
- * @xtalk_addr: xtalk_address that needs to be mapped
- * @size: size of the PIO mapping
- *
- **/
-unsigned long hub_pio_map(nasid_t nasid, xwidgetnum_t widget,
- unsigned long xtalk_addr, size_t size)
-{
- unsigned i;
-
- /* use small-window mapping if possible */
- if ((xtalk_addr % SWIN_SIZE) + size <= SWIN_SIZE)
- return NODE_SWIN_BASE(nasid, widget) + (xtalk_addr % SWIN_SIZE);
-
- if ((xtalk_addr % BWIN_SIZE) + size > BWIN_SIZE) {
- printk(KERN_WARNING "PIO mapping at hub %d widget %d addr 0x%lx"
- " too big (%ld)\n",
- nasid, widget, xtalk_addr, size);
- return 0;
- }
-
- xtalk_addr &= ~(BWIN_SIZE-1);
- for (i = 0; i < HUB_NUM_BIG_WINDOW; i++) {
- if (test_and_set_bit(i, hub_data(nasid)->h_bigwin_used))
- continue;
-
- /*
- * The code below does a PIO write to setup an ITTE entry.
- *
- * We need to prevent other CPUs from seeing our updated
- * memory shadow of the ITTE (in the piomap) until the ITTE
- * entry is actually set up; otherwise, another CPU might
- * attempt a PIO prematurely.
- *
- * Also, the only way we can know that an entry has been
- * received by the hub and can be used by future PIO reads/
- * writes is by reading back the ITTE entry after writing it.
- *
- * For these two reasons, we PIO read back the ITTE entry
- * after we write it.
- */
- IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr);
- __raw_readq(IIO_ITTE_GET(nasid, i));
-
- return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE);
- }
-
- printk(KERN_WARNING "unable to establish PIO mapping for at"
- " hub %d widget %d addr 0x%lx\n",
- nasid, widget, xtalk_addr);
- return 0;
-}
-
-
-/*
- * hub_setup_prb(nasid, prbnum, credits, conveyor)
- *
- * Put a PRB into fire-and-forget mode if conveyor isn't set. Otherwise,
- * put it into conveyor belt mode with the specified number of credits.
- */
-static void hub_setup_prb(nasid_t nasid, int prbnum, int credits)
-{
- union iprb_u prb;
- int prb_offset;
-
- /*
- * Get the current register value.
- */
- prb_offset = IIO_IOPRB(prbnum);
- prb.iprb_regval = REMOTE_HUB_L(nasid, prb_offset);
-
- /*
- * Clear out some fields.
- */
- prb.iprb_ovflow = 1;
- prb.iprb_bnakctr = 0;
- prb.iprb_anakctr = 0;
-
- /*
- * Enable or disable fire-and-forget mode.
- */
- prb.iprb_ff = force_fire_and_forget ? 1 : 0;
-
- /*
- * Set the appropriate number of PIO credits for the widget.
- */
- prb.iprb_xtalkctr = credits;
-
- /*
- * Store the new value to the register.
- */
- REMOTE_HUB_S(nasid, prb_offset, prb.iprb_regval);
-}
-
-/**
- * hub_set_piomode - set pio mode for a given hub
- *
- * @nasid: physical node ID for the hub in question
- *
- * Put the hub into either "PIO conveyor belt" mode or "fire-and-forget" mode.
- * To do this, we have to make absolutely sure that no PIOs are in progress
- * so we turn off access to all widgets for the duration of the function.
- *
- * XXX - This code should really check what kind of widget we're talking
- * to. Bridges can only handle three requests, but XG will do more.
- * How many can crossbow handle to widget 0? We're assuming 1.
- *
- * XXX - There is a bug in the crossbow that link reset PIOs do not
- * return write responses. The easiest solution to this problem is to
- * leave widget 0 (xbow) in fire-and-forget mode at all times. This
- * only affects pio's to xbow registers, which should be rare.
- **/
-static void hub_set_piomode(nasid_t nasid)
-{
- u64 ii_iowa;
- union hubii_wcr_u ii_wcr;
- unsigned i;
-
- ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS);
- REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, 0);
-
- ii_wcr.wcr_reg_value = REMOTE_HUB_L(nasid, IIO_WCR);
-
- if (ii_wcr.iwcr_dir_con) {
- /*
- * Assume a bridge here.
- */
- hub_setup_prb(nasid, 0, 3);
- } else {
- /*
- * Assume a crossbow here.
- */
- hub_setup_prb(nasid, 0, 1);
- }
-
- /*
- * XXX - Here's where we should take the widget type into
- * when account assigning credits.
- */
- for (i = HUB_WIDGET_ID_MIN; i <= HUB_WIDGET_ID_MAX; i++)
- hub_setup_prb(nasid, i, 3);
-
- REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, ii_iowa);
-}
-
-/*
- * hub_pio_init - PIO-related hub initialization
- *
- * @hub: hubinfo structure for our hub
- */
-void hub_pio_init(nasid_t nasid)
-{
- unsigned i;
-
- /* initialize big window piomaps for this hub */
- bitmap_zero(hub_data(nasid)->h_bigwin_used, HUB_NUM_BIG_WINDOW);
- for (i = 0; i < HUB_NUM_BIG_WINDOW; i++)
- IIO_ITTE_DISABLE(nasid, i);
-
- hub_set_piomode(nasid);
-}
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index a0dd3bd2b81b..8f5299b269e7 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -23,6 +23,8 @@
#include <asm/sn/intr.h>
#include <asm/sn/irq_alloc.h>
+#include "ip27-common.h"
+
struct hub_irq_data {
u64 *irq_mask[2];
cpuid_t cpu;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f79c48393716..b8ca94cfb4fe 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -23,6 +23,7 @@
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
+#include <asm/sgialib.h>
#include <asm/sn/arch.h>
#include <asm/sn/agent.h>
diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
index 84889b57d5ff..fc2816398d0c 100644
--- a/arch/mips/sgi-ip27/ip27-nmi.c
+++ b/arch/mips/sgi-ip27/ip27-nmi.c
@@ -11,6 +11,8 @@
#include <asm/sn/arch.h>
#include <asm/sn/agent.h>
+#include "ip27-common.h"
+
#if 0
#define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n)
#else
@@ -23,16 +25,7 @@
typedef unsigned long machreg_t;
static arch_spinlock_t nmi_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-
-/*
- * Let's see what else we need to do here. Set up sp, gp?
- */
-void nmi_dump(void)
-{
- void cont_nmi_dump(void);
-
- cont_nmi_dump();
-}
+static void nmi_dump(void);
void install_cpu_nmi_handler(int slice)
{
@@ -53,7 +46,7 @@ void install_cpu_nmi_handler(int slice)
* into the eframe format for the node under consideration.
*/
-void nmi_cpu_eframe_save(nasid_t nasid, int slice)
+static void nmi_cpu_eframe_save(nasid_t nasid, int slice)
{
struct reg_struct *nr;
int i;
@@ -129,7 +122,7 @@ void nmi_cpu_eframe_save(nasid_t nasid, int slice)
pr_emerg("\n");
}
-void nmi_dump_hub_irq(nasid_t nasid, int slice)
+static void nmi_dump_hub_irq(nasid_t nasid, int slice)
{
u64 mask0, mask1, pend0, pend1;
@@ -153,7 +146,7 @@ void nmi_dump_hub_irq(nasid_t nasid, int slice)
* Copy the cpu registers which have been saved in the IP27prom format
* into the eframe format for the node under consideration.
*/
-void nmi_node_eframe_save(nasid_t nasid)
+static void nmi_node_eframe_save(nasid_t nasid)
{
int slice;
@@ -170,8 +163,7 @@ void nmi_node_eframe_save(nasid_t nasid)
/*
* Save the nmi cpu registers for all cpus in the system.
*/
-void
-nmi_eframes_save(void)
+static void nmi_eframes_save(void)
{
nasid_t nasid;
@@ -179,8 +171,7 @@ nmi_eframes_save(void)
nmi_node_eframe_save(nasid);
}
-void
-cont_nmi_dump(void)
+static void nmi_dump(void)
{
#ifndef REAL_NMI_SIGNAL
static atomic_t nmied_cpus = ATOMIC_INIT(0);
diff --git a/arch/mips/sgi-ip30/ip30-console.c b/arch/mips/sgi-ip30/ip30-console.c
index b91f8c4fdc78..7c6dcf6e73f7 100644
--- a/arch/mips/sgi-ip30/ip30-console.c
+++ b/arch/mips/sgi-ip30/ip30-console.c
@@ -3,6 +3,7 @@
#include <linux/io.h>
#include <asm/sn/ioc3.h>
+#include <asm/setup.h>
static inline struct ioc3_uartregs *console_uart(void)
{
diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c
index 75a34684e704..e8547636a748 100644
--- a/arch/mips/sgi-ip30/ip30-setup.c
+++ b/arch/mips/sgi-ip30/ip30-setup.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/memblock.h>
+#include <asm/bootinfo.h>
#include <asm/smp-ops.h>
#include <asm/sgialib.h>
#include <asm/time.h>
diff --git a/arch/mips/sgi-ip32/crime.c b/arch/mips/sgi-ip32/crime.c
index a8e0c776ca6c..b8a0e4cfa9ce 100644
--- a/arch/mips/sgi-ip32/crime.c
+++ b/arch/mips/sgi-ip32/crime.c
@@ -18,6 +18,8 @@
#include <asm/ip32/crime.h>
#include <asm/ip32/mace.h>
+#include "ip32-common.h"
+
struct sgi_crime __iomem *crime;
struct sgi_mace __iomem *mace;
@@ -39,7 +41,7 @@ void __init crime_init(void)
id, rev, field, (unsigned long) CRIME_BASE);
}
-irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id)
+irqreturn_t crime_memerr_intr(int irq, void *dev_id)
{
unsigned long stat, addr;
int fatal = 0;
@@ -90,7 +92,7 @@ irqreturn_t crime_memerr_intr(unsigned int irq, void *dev_id)
return IRQ_HANDLED;
}
-irqreturn_t crime_cpuerr_intr(unsigned int irq, void *dev_id)
+irqreturn_t crime_cpuerr_intr(int irq, void *dev_id)
{
unsigned long stat = crime->cpu_error_stat & CRIME_CPU_ERROR_MASK;
unsigned long addr = crime->cpu_error_addr & CRIME_CPU_ERROR_ADDR_MASK;
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index 478b63b4c808..7cbc27941f92 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -18,6 +18,8 @@
#include <asm/ptrace.h>
#include <asm/tlbdebug.h>
+#include "ip32-common.h"
+
static int ip32_be_handler(struct pt_regs *regs, int is_fixup)
{
int data = regs->cp0_cause & 4;
diff --git a/arch/mips/sgi-ip32/ip32-common.h b/arch/mips/sgi-ip32/ip32-common.h
new file mode 100644
index 000000000000..cfc0225b1419
--- /dev/null
+++ b/arch/mips/sgi-ip32/ip32-common.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __IP32_COMMON_H
+#define __IP32_COMMON_H
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+
+void __init crime_init(void);
+irqreturn_t crime_memerr_intr(int irq, void *dev_id);
+irqreturn_t crime_cpuerr_intr(int irq, void *dev_id);
+void __init ip32_be_init(void);
+void ip32_prepare_poweroff(void);
+
+#endif /* __IP32_COMMON_H */
diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c
index e21ea1de05e3..29d04468a06b 100644
--- a/arch/mips/sgi-ip32/ip32-irq.c
+++ b/arch/mips/sgi-ip32/ip32-irq.c
@@ -28,6 +28,8 @@
#include <asm/ip32/mace.h>
#include <asm/ip32/ip32_ints.h>
+#include "ip32-common.h"
+
/* issue a PIO read to make sure no PIO writes are pending */
static inline void flush_crime_bus(void)
{
@@ -107,10 +109,6 @@ static inline void flush_mace_bus(void)
* is quite different anyway.
*/
-/* Some initial interrupts to set up */
-extern irqreturn_t crime_memerr_intr(int irq, void *dev_id);
-extern irqreturn_t crime_cpuerr_intr(int irq, void *dev_id);
-
/*
* This is for pure CRIME interrupts - ie not MACE. The advantage?
* We get to split the register in half and do faster lookups.
diff --git a/arch/mips/sgi-ip32/ip32-memory.c b/arch/mips/sgi-ip32/ip32-memory.c
index 3fc8d0a0bdfa..5fee33744f67 100644
--- a/arch/mips/sgi-ip32/ip32-memory.c
+++ b/arch/mips/sgi-ip32/ip32-memory.c
@@ -15,6 +15,7 @@
#include <asm/ip32/crime.h>
#include <asm/bootinfo.h>
#include <asm/page.h>
+#include <asm/sgialib.h>
extern void crime_init(void);
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 18d1c115cd53..6bdc1421cda4 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -29,6 +29,8 @@
#include <asm/ip32/crime.h>
#include <asm/ip32/ip32_ints.h>
+#include "ip32-common.h"
+
#define POWERDOWN_TIMEOUT 120
/*
* Blink frequency during reboot grace period and when panicked.
diff --git a/arch/mips/sgi-ip32/ip32-setup.c b/arch/mips/sgi-ip32/ip32-setup.c
index 8019dae1721a..aeb0805aae57 100644
--- a/arch/mips/sgi-ip32/ip32-setup.c
+++ b/arch/mips/sgi-ip32/ip32-setup.c
@@ -26,8 +26,7 @@
#include <asm/ip32/mace.h>
#include <asm/ip32/ip32_ints.h>
-extern void ip32_be_init(void);
-extern void crime_init(void);
+#include "ip32-common.h"
#ifdef CONFIG_SGI_O2MACE_ETH
/*
diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c
index 5ae30b78d38d..d9249f5a632e 100644
--- a/arch/mips/txx9/generic/pci.c
+++ b/arch/mips/txx9/generic/pci.c
@@ -348,7 +348,7 @@ static void final_fixup(struct pci_dev *dev)
unsigned char bist;
int ret;
- /* Do build-in self test */
+ /* Do built-in self test */
ret = pci_read_config_byte(dev, PCI_BIST, &bist);
if ((ret != PCIBIOS_SUCCESSFUL) || !(bist & PCI_BIST_CAPABLE))
return;
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 348cea097792..81484a776b33 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -38,6 +38,7 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page,
#define flush_icache_pages flush_icache_pages
#define flush_cache_vmap(start, end) flush_dcache_range(start, end)
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) flush_dcache_range(start, end)
extern void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index b4006f2a9705..ba4c05bc24d6 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -41,6 +41,7 @@ void flush_kernel_vmap_range(void *vaddr, int size);
void invalidate_kernel_vmap_range(void *vaddr, int size);
#define flush_cache_vmap(start, end) flush_cache_all()
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) flush_cache_all()
void flush_dcache_folio(struct folio *folio);
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 904ca3b9e7a7..c69f6d5946e9 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -123,10 +123,10 @@ static unsigned long f_extend(unsigned long address)
#ifdef CONFIG_64BIT
if(unlikely(parisc_narrow_firmware)) {
if((address & 0xff000000) == 0xf0000000)
- return 0xf0f0f0f000000000UL | (u32)address;
+ return (0xfffffff0UL << 32) | (u32)address;
if((address & 0xf0000000) == 0xf0000000)
- return 0xffffffff00000000UL | (u32)address;
+ return (0xffffffffUL << 32) | (u32)address;
}
#endif
return address;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 414b978b8010..b9fc064d38d2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -859,6 +859,7 @@ config THREAD_SHIFT
int "Thread shift" if EXPERT
range 13 15
default "15" if PPC_256K_PAGES
+ default "15" if PPC_PSERIES || PPC_POWERNV
default "14" if PPC64
default "13"
help
diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
index ccb2034506f0..d841a97010a0 100644
--- a/arch/powerpc/include/asm/hvconsole.h
+++ b/arch/powerpc/include/asm/hvconsole.h
@@ -21,8 +21,8 @@
* Vio firmware always attempts to fetch MAX_VIO_GET_CHARS chars. The 'count'
* parm is included to conform to put_chars() function pointer template
*/
-extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
-extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
+extern ssize_t hvc_get_chars(uint32_t vtermno, u8 *buf, size_t count);
+extern ssize_t hvc_put_chars(uint32_t vtermno, const u8 *buf, size_t count);
/* Provided by HVC VIO */
void hvc_vio_init_early(void);
diff --git a/arch/powerpc/include/asm/hvsi.h b/arch/powerpc/include/asm/hvsi.h
index 464a7519ed64..9058edcb632b 100644
--- a/arch/powerpc/include/asm/hvsi.h
+++ b/arch/powerpc/include/asm/hvsi.h
@@ -64,7 +64,7 @@ struct hvsi_priv {
unsigned int inbuf_len; /* data in input buffer */
unsigned char inbuf[HVSI_INBUF_SIZE];
unsigned int inbuf_cur; /* Cursor in input buffer */
- unsigned int inbuf_pktlen; /* packet length from cursor */
+ size_t inbuf_pktlen; /* packet length from cursor */
atomic_t seqno; /* packet sequence number */
unsigned int opened:1; /* driver opened */
unsigned int established:1; /* protocol established */
@@ -72,24 +72,26 @@ struct hvsi_priv {
unsigned int mctrl_update:1; /* modem control updated */
unsigned short mctrl; /* modem control */
struct tty_struct *tty; /* tty structure */
- int (*get_chars)(uint32_t termno, char *buf, int count);
- int (*put_chars)(uint32_t termno, const char *buf, int count);
+ ssize_t (*get_chars)(uint32_t termno, u8 *buf, size_t count);
+ ssize_t (*put_chars)(uint32_t termno, const u8 *buf, size_t count);
uint32_t termno;
};
/* hvsi lib functions */
struct hvc_struct;
extern void hvsilib_init(struct hvsi_priv *pv,
- int (*get_chars)(uint32_t termno, char *buf, int count),
- int (*put_chars)(uint32_t termno, const char *buf,
- int count),
+ ssize_t (*get_chars)(uint32_t termno, u8 *buf,
+ size_t count),
+ ssize_t (*put_chars)(uint32_t termno, const u8 *buf,
+ size_t count),
int termno, int is_console);
extern int hvsilib_open(struct hvsi_priv *pv, struct hvc_struct *hp);
extern void hvsilib_close(struct hvsi_priv *pv, struct hvc_struct *hp);
extern int hvsilib_read_mctrl(struct hvsi_priv *pv);
extern int hvsilib_write_mctrl(struct hvsi_priv *pv, int dtr);
extern void hvsilib_establish(struct hvsi_priv *pv);
-extern int hvsilib_get_chars(struct hvsi_priv *pv, char *buf, int count);
-extern int hvsilib_put_chars(struct hvsi_priv *pv, const char *buf, int count);
+extern ssize_t hvsilib_get_chars(struct hvsi_priv *pv, u8 *buf, size_t count);
+extern ssize_t hvsilib_put_chars(struct hvsi_priv *pv, const u8 *buf,
+ size_t count);
#endif /* _HVSI_H */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8799b37be295..8abac532146e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -63,8 +63,6 @@
#include <linux/mmu_notifier.h>
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
#define HPTEG_HASH_BITS_PTE_LONG 12
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index b66b0c615f4f..af304e6cb486 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -313,9 +313,11 @@ extern int early_init_dt_scan_recoverable_ranges(unsigned long node,
const char *uname, int depth, void *data);
void __init opal_configure_cores(void);
-extern int opal_get_chars(uint32_t vtermno, char *buf, int count);
-extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
-extern int opal_put_chars_atomic(uint32_t vtermno, const char *buf, int total_len);
+extern ssize_t opal_get_chars(uint32_t vtermno, u8 *buf, size_t count);
+extern ssize_t opal_put_chars(uint32_t vtermno, const u8 *buf,
+ size_t total_len);
+extern ssize_t opal_put_chars_atomic(uint32_t vtermno, const u8 *buf,
+ size_t total_len);
extern int opal_flush_chars(uint32_t vtermno, bool wait);
extern int opal_flush_console(uint32_t vtermno);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 902611954200..074263429faf 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -19,13 +19,11 @@ if VIRTUALIZATION
config KVM
bool
- select PREEMPT_NOTIFIERS
- select HAVE_KVM_EVENTFD
+ select KVM_COMMON
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_VFIO
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
- select INTERVAL_TREE
config KVM_BOOK3S_HANDLER
bool
@@ -42,7 +40,7 @@ config KVM_BOOK3S_64_HANDLER
config KVM_BOOK3S_PR_POSSIBLE
bool
select KVM_MMIO
- select MMU_NOTIFIER
+ select KVM_GENERIC_MMU_NOTIFIER
config KVM_BOOK3S_HV_POSSIBLE
bool
@@ -85,7 +83,7 @@ config KVM_BOOK3S_64_HV
tristate "KVM for POWER7 and later using hypervisor mode in host"
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
- select MMU_NOTIFIER
+ select KVM_GENERIC_MMU_NOTIFIER
select CMA
help
Support running unmodified book3s_64 guest kernels in
@@ -194,7 +192,7 @@ config KVM_E500V2
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
- select MMU_NOTIFIER
+ select KVM_GENERIC_MMU_NOTIFIER
help
Support running unmodified E500 guest kernels in virtual machines on
E500v2 host processors.
@@ -211,7 +209,7 @@ config KVM_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
- select MMU_NOTIFIER
+ select KVM_GENERIC_MMU_NOTIFIER
help
Support running unmodified E500MC/E5500/E6500 guest kernels in
virtual machines on E500MC/E5500/E6500 host processors.
@@ -225,7 +223,6 @@ config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && PPC_E500
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
help
@@ -238,7 +235,6 @@ config KVM_XICS
bool "KVM in-kernel XICS emulation"
depends on KVM_BOOK3S_64 && !KVM_MPIC
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
default y
help
Include support for the XICS (eXternal Interrupt Controller
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e48126a59ba7..52427fc2a33f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -6240,7 +6240,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
}
srcu_idx = srcu_read_lock(&kvm->srcu);
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
struct kvm_memory_slot *memslot;
struct kvm_memslots *slots = __kvm_memslots(kvm, i);
int bkt;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f6af752698d0..23407fbd73c9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -528,7 +528,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
@@ -578,7 +577,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
-#ifdef CONFIG_HAVE_KVM_IRQFD
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
case KVM_CAP_IRQFD_RESAMPLE:
r = !xive_enabled();
break;
@@ -632,13 +631,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
case KVM_CAP_SYNC_MMU:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- r = hv_enabled;
-#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER));
r = 1;
-#else
- r = 0;
-#endif
break;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_HTAB_FD:
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cdf3838f08d3..45dd77e3ccf6 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -424,7 +424,7 @@ static int __init opal_message_init(struct device_node *opal_node)
return 0;
}
-int opal_get_chars(uint32_t vtermno, char *buf, int count)
+ssize_t opal_get_chars(uint32_t vtermno, u8 *buf, size_t count)
{
s64 rc;
__be64 evt, len;
@@ -441,10 +441,11 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count)
return 0;
}
-static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
+static ssize_t __opal_put_chars(uint32_t vtermno, const u8 *data,
+ size_t total_len, bool atomic)
{
unsigned long flags = 0 /* shut up gcc */;
- int written;
+ ssize_t written;
__be64 olen;
s64 rc;
@@ -484,7 +485,7 @@ static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, b
if (atomic) {
/* Should not happen */
pr_warn("atomic console write returned partial "
- "len=%d written=%d\n", total_len, written);
+ "len=%zu written=%zd\n", total_len, written);
}
if (!written)
written = -EAGAIN;
@@ -497,7 +498,7 @@ out:
return written;
}
-int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
+ssize_t opal_put_chars(uint32_t vtermno, const u8 *data, size_t total_len)
{
return __opal_put_chars(vtermno, data, total_len, false);
}
@@ -508,7 +509,8 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
* true at the moment because console space can race with OPAL's console
* writes.
*/
-int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
+ssize_t opal_put_chars_atomic(uint32_t vtermno, const u8 *data,
+ size_t total_len)
{
return __opal_put_chars(vtermno, data, total_len, true);
}
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index 1ac52963e08b..8803c947998e 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -25,7 +25,7 @@
* firmware.
* @count: not used?
*/
-int hvc_get_chars(uint32_t vtermno, char *buf, int count)
+ssize_t hvc_get_chars(uint32_t vtermno, u8 *buf, size_t count)
{
long ret;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(hvc_get_chars);
* firmware. Must be at least 16 bytes, even if count is less than 16.
* @count: Send this number of characters.
*/
-int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+ssize_t hvc_put_chars(uint32_t vtermno, const u8 *buf, size_t count)
{
unsigned long *lbuf = (unsigned long *) buf;
long ret;
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 3868483fbe29..ef7707ea0db7 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -54,7 +54,7 @@ static void quirk_fsl_pcie_early(struct pci_dev *dev)
/* if we aren't in host mode don't bother */
pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type);
- if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+ if ((hdr_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
return;
dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
@@ -581,7 +581,7 @@ static int fsl_add_bridge(struct platform_device *pdev, int is_primary)
hose->ops = &fsl_indirect_pcie_ops;
/* For PCIE read HEADER_TYPE to identify controller mode */
early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type);
- if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+ if ((hdr_type & PCI_HEADER_TYPE_MASK) != PCI_HEADER_TYPE_BRIDGE)
goto no_bridge;
} else {
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cd4c9a204d08..bffbd869a068 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -53,24 +53,28 @@ config RISCV
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USES_CFI_TRAPS if CFI_CLANG
+ select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
+ select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
select CLINT_TIMER if !MMU
select CLONE_BACKWARDS
select COMMON_CLK
- select CPU_PM if CPU_IDLE || HIBERNATION
+ select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
select EDAC_SUPPORT
select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
+ select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
select GENERIC_ARCH_TOPOLOGY
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select GENERIC_CPU_DEVICES
select GENERIC_EARLY_IOREMAP
select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
@@ -113,6 +117,7 @@ config RISCV
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
+ select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER
@@ -140,6 +145,8 @@ config RISCV
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RETHOOK if !XIP_KERNEL
select HAVE_RSEQ
+ select HAVE_SAMPLE_FTRACE_DIRECT
+ select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
@@ -181,6 +188,20 @@ config HAVE_SHADOW_CALL_STACK
# https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
depends on $(ld-option,--no-relax-gp)
+config RISCV_USE_LINKER_RELAXATION
+ def_bool y
+ # https://github.com/llvm/llvm-project/commit/6611d58f5bbcbec77262d392e2923e1d680f6985
+ depends on !LD_IS_LLD || LLD_VERSION >= 150000
+
+# https://github.com/llvm/llvm-project/commit/bbc0f99f3bc96f1db16f649fc21dd18e5b0918f6
+config ARCH_HAS_BROKEN_DWARF5
+ def_bool y
+ depends on RISCV_USE_LINKER_RELAXATION
+ # https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a
+ depends on AS_IS_LLVM && AS_VERSION < 180000
+ # https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77
+ depends on LD_IS_LLD && LLD_VERSION < 180000
+
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
default 8
@@ -414,7 +435,9 @@ config NUMA
depends on SMP && MMU
select ARCH_SUPPORTS_NUMA_BALANCING
select GENERIC_ARCH_NUMA
+ select HAVE_SETUP_PER_CPU_AREA
select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK
select OF_NUMA
select USE_PERCPU_NUMA_NODE_ID
help
@@ -525,6 +548,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE
If you don't know what to do here, say Y.
+config RISCV_ISA_V_UCOPY_THRESHOLD
+ int "Threshold size for vectorized user copies"
+ depends on RISCV_ISA_V
+ default 768
+ help
+ Prefer using vectorized copy_to_user()/copy_from_user() when the
+ workload size exceeds this value.
+
+config RISCV_ISA_V_PREEMPTIVE
+ bool "Run kernel-mode Vector with kernel preemption"
+ depends on PREEMPTION
+ depends on RISCV_ISA_V
+ default y
+ help
+ Usually, in-kernel SIMD routines are run with preemption disabled.
+ Functions which envoke long running SIMD thus must yield core's
+ vector unit to prevent blocking other tasks for too long.
+
+ This config allows kernel to run SIMD without explicitly disable
+ preemption. Enabling this config will result in higher memory
+ consumption due to the allocation of per-task's kernel Vector context.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
@@ -651,6 +696,20 @@ config RISCV_MISALIGNED
load/store for both kernel and userspace. When disable, misaligned
accesses will generate SIGBUS in userspace and panic in kernel.
+config RISCV_EFFICIENT_UNALIGNED_ACCESS
+ bool "Assume the CPU supports fast unaligned memory accesses"
+ depends on NONPORTABLE
+ select DCACHE_WORD_ACCESS if MMU
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ help
+ Say Y here if you want the kernel to assume that the CPU supports
+ efficient unaligned memory accesses. When enabled, this option
+ improves the performance of the kernel on such CPUs. However, the
+ kernel will run much more slowly, or will not be able to run at all,
+ on CPUs that do not support efficient unaligned memory accesses.
+
+ If unsure what to do here, say N.
+
endmenu # "Platform type"
menu "Kernel features"
@@ -722,6 +781,25 @@ config COMPAT
If you want to execute 32-bit userspace applications, say Y.
+config PARAVIRT
+ bool "Enable paravirtualization code"
+ depends on RISCV_SBI
+ help
+ This changes the kernel so it can modify itself when it is run
+ under a hypervisor, potentially improving performance significantly
+ over full virtualization.
+
+config PARAVIRT_TIME_ACCOUNTING
+ bool "Paravirtual steal time accounting"
+ depends on PARAVIRT
+ help
+ Select this option to enable fine granularity task steal time
+ accounting. Time spent executing other tasks in parallel with
+ the current vCPU is discounted from the vCPU power. To account for
+ that, there can be a small performance impact.
+
+ If in doubt, say N here.
+
config RELOCATABLE
bool "Build a relocatable kernel"
depends on MMU && 64BIT && !XIP_KERNEL
@@ -902,13 +980,13 @@ config RISCV_ISA_FALLBACK
on the replacement properties, "riscv,isa-base" and
"riscv,isa-extensions".
-endmenu # "Boot options"
-
config BUILTIN_DTB
- bool
+ bool "Built-in device tree"
depends on OF && NONPORTABLE
default y if XIP_KERNEL
+endmenu # "Boot options"
+
config PORTABLE
bool
default !NONPORTABLE
diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index f5c432b005e7..910ba8837add 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -98,6 +98,7 @@ config ERRATA_THEAD_CMO
depends on ERRATA_THEAD && MMU
select DMA_DIRECT_REMAP
select RISCV_DMA_NONCOHERENT
+ select RISCV_NONSTANDARD_CACHE_OPS
default y
help
This will apply the cache management errata to handle the
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index a74be78678eb..0b7d109258e7 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -43,8 +43,7 @@ else
KBUILD_LDFLAGS += -melf32lriscv
endif
-ifeq ($(CONFIG_LD_IS_LLD),y)
-ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y)
+ifndef CONFIG_RISCV_USE_LINKER_RELAXATION
KBUILD_CFLAGS += -mno-relax
KBUILD_AFLAGS += -mno-relax
ifndef CONFIG_AS_IS_LLVM
@@ -52,7 +51,6 @@ ifndef CONFIG_AS_IS_LLVM
KBUILD_AFLAGS += -Wa,-mno-relax
endif
endif
-endif
ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
KBUILD_LDFLAGS += --no-relax-gp
@@ -108,7 +106,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
# unaligned accesses. While unaligned accesses are explicitly allowed in the
# RISC-V ISA, they're emulated by machine mode traps on all extant
# architectures. It's faster to have GCC emit only aligned accesses.
+ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y)
KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+endif
ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
prepare: stack_protector_prepare
@@ -163,6 +163,8 @@ BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi
all: $(notdir $(KBUILD_IMAGE))
+loader.bin: loader
+Image.gz loader vmlinuz.efi: Image
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
@$(kecho) ' Kernel: $(boot)/$@ is ready'
diff --git a/arch/riscv/boot/dts/sophgo/sg2042.dtsi b/arch/riscv/boot/dts/sophgo/sg2042.dtsi
index 93256540d078..ead1cc35d88b 100644
--- a/arch/riscv/boot/dts/sophgo/sg2042.dtsi
+++ b/arch/riscv/boot/dts/sophgo/sg2042.dtsi
@@ -93,144 +93,160 @@
<&cpu63_intc 3>;
};
- clint_mtimer0: timer@70ac000000 {
+ clint_mtimer0: timer@70ac004000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac000000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac004000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu0_intc 7>,
<&cpu1_intc 7>,
<&cpu2_intc 7>,
<&cpu3_intc 7>;
};
- clint_mtimer1: timer@70ac010000 {
+ clint_mtimer1: timer@70ac014000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac010000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac014000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu4_intc 7>,
<&cpu5_intc 7>,
<&cpu6_intc 7>,
<&cpu7_intc 7>;
};
- clint_mtimer2: timer@70ac020000 {
+ clint_mtimer2: timer@70ac024000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac020000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac024000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu8_intc 7>,
<&cpu9_intc 7>,
<&cpu10_intc 7>,
<&cpu11_intc 7>;
};
- clint_mtimer3: timer@70ac030000 {
+ clint_mtimer3: timer@70ac034000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac030000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac034000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu12_intc 7>,
<&cpu13_intc 7>,
<&cpu14_intc 7>,
<&cpu15_intc 7>;
};
- clint_mtimer4: timer@70ac040000 {
+ clint_mtimer4: timer@70ac044000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac040000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac044000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu16_intc 7>,
<&cpu17_intc 7>,
<&cpu18_intc 7>,
<&cpu19_intc 7>;
};
- clint_mtimer5: timer@70ac050000 {
+ clint_mtimer5: timer@70ac054000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac050000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac054000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu20_intc 7>,
<&cpu21_intc 7>,
<&cpu22_intc 7>,
<&cpu23_intc 7>;
};
- clint_mtimer6: timer@70ac060000 {
+ clint_mtimer6: timer@70ac064000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac060000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac064000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu24_intc 7>,
<&cpu25_intc 7>,
<&cpu26_intc 7>,
<&cpu27_intc 7>;
};
- clint_mtimer7: timer@70ac070000 {
+ clint_mtimer7: timer@70ac074000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac070000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac074000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu28_intc 7>,
<&cpu29_intc 7>,
<&cpu30_intc 7>,
<&cpu31_intc 7>;
};
- clint_mtimer8: timer@70ac080000 {
+ clint_mtimer8: timer@70ac084000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac080000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac084000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu32_intc 7>,
<&cpu33_intc 7>,
<&cpu34_intc 7>,
<&cpu35_intc 7>;
};
- clint_mtimer9: timer@70ac090000 {
+ clint_mtimer9: timer@70ac094000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac090000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac094000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu36_intc 7>,
<&cpu37_intc 7>,
<&cpu38_intc 7>,
<&cpu39_intc 7>;
};
- clint_mtimer10: timer@70ac0a0000 {
+ clint_mtimer10: timer@70ac0a4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0a0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0a4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu40_intc 7>,
<&cpu41_intc 7>,
<&cpu42_intc 7>,
<&cpu43_intc 7>;
};
- clint_mtimer11: timer@70ac0b0000 {
+ clint_mtimer11: timer@70ac0b4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0b0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0b4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu44_intc 7>,
<&cpu45_intc 7>,
<&cpu46_intc 7>,
<&cpu47_intc 7>;
};
- clint_mtimer12: timer@70ac0c0000 {
+ clint_mtimer12: timer@70ac0c4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0c0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0c4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu48_intc 7>,
<&cpu49_intc 7>,
<&cpu50_intc 7>,
<&cpu51_intc 7>;
};
- clint_mtimer13: timer@70ac0d0000 {
+ clint_mtimer13: timer@70ac0d4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0d0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0d4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu52_intc 7>,
<&cpu53_intc 7>,
<&cpu54_intc 7>,
<&cpu55_intc 7>;
};
- clint_mtimer14: timer@70ac0e0000 {
+ clint_mtimer14: timer@70ac0e4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0e0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0e4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu56_intc 7>,
<&cpu57_intc 7>,
<&cpu58_intc 7>,
<&cpu59_intc 7>;
};
- clint_mtimer15: timer@70ac0f0000 {
+ clint_mtimer15: timer@70ac0f4000 {
compatible = "sophgo,sg2042-aclint-mtimer", "thead,c900-aclint-mtimer";
- reg = <0x00000070 0xac0f0000 0x00000000 0x00007ff8>;
+ reg = <0x00000070 0xac0f4000 0x00000000 0x0000c000>;
+ reg-names = "mtimecmp";
interrupts-extended = <&cpu60_intc 7>,
<&cpu61_intc 7>,
<&cpu62_intc 7>,
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 905881282a7c..eaf34e871e30 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -149,6 +149,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
deleted file mode 100644
index 89b601e253a6..000000000000
--- a/arch/riscv/configs/rv32_defconfig
+++ /dev/null
@@ -1,139 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_NO_HZ_IDLE=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_CFS_BANDWIDTH=y
-CONFIG_CGROUP_BPF=y
-CONFIG_NAMESPACES=y
-CONFIG_USER_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_PROFILING=y
-CONFIG_SOC_SIFIVE=y
-CONFIG_SOC_VIRT=y
-CONFIG_NONPORTABLE=y
-CONFIG_ARCH_RV32I=y
-CONFIG_SMP=y
-CONFIG_HOTPLUG_CPU=y
-CONFIG_PM=y
-CONFIG_CPU_IDLE=y
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM=m
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NETLINK_DIAG=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_PCI_HOST_GENERIC=y
-CONFIG_PCIE_XILINX=y
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_SCSI_VIRTIO=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_AHCI_PLATFORM=y
-CONFIG_NETDEVICES=y
-CONFIG_VIRTIO_NET=y
-CONFIG_MACB=y
-CONFIG_E1000E=y
-CONFIG_R8169=y
-CONFIG_MICROSEMI_PHY=y
-CONFIG_INPUT_MOUSEDEV=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_HW_RANDOM=y
-CONFIG_HW_RANDOM_VIRTIO=y
-CONFIG_SPI=y
-CONFIG_SPI_SIFIVE=y
-# CONFIG_PTP_1588_CLOCK is not set
-CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
-CONFIG_DRM_VIRTIO_GPU=y
-CONFIG_FB=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_USB=y
-CONFIG_USB_XHCI_HCD=y
-CONFIG_USB_XHCI_PLATFORM=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_OHCI_HCD_PLATFORM=y
-CONFIG_USB_STORAGE=y
-CONFIG_USB_UAS=y
-CONFIG_MMC=y
-CONFIG_MMC_SPI=y
-CONFIG_RTC_CLASS=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_INPUT=y
-CONFIG_VIRTIO_MMIO=y
-CONFIG_RPMSG_CHAR=y
-CONFIG_RPMSG_CTRL=y
-CONFIG_RPMSG_VIRTIO=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_AUTOFS_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_HUGETLBFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_NFS_V4_1=y
-CONFIG_NFS_V4_2=y
-CONFIG_ROOT_NFS=y
-CONFIG_9P_FS=y
-CONFIG_CRYPTO_USER_API_HASH=y
-CONFIG_CRYPTO_DEV_VIRTIO=y
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_SCHED_STACK_END_CHECK=y
-CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_VM_PGFLAGS=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_DEBUG_PER_CPU_MAPS=y
-CONFIG_SOFTLOCKUP_DETECTOR=y
-CONFIG_WQ_WATCHDOG=y
-CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_RWSEMS=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_STACKTRACE=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_PLIST=y
-CONFIG_DEBUG_SG=y
-# CONFIG_RCU_TRACE is not set
-CONFIG_RCU_EQS_DEBUG=y
-# CONFIG_FTRACE is not set
-# CONFIG_RUNTIME_TESTING_MENU is not set
-CONFIG_MEMTEST=y
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 0554ed4bf087..b1c410bbc1ae 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -12,8 +12,10 @@
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
+#include <asm/dma-noncoherent.h>
#include <asm/errata_list.h>
#include <asm/hwprobe.h>
+#include <asm/io.h>
#include <asm/patch.h>
#include <asm/vendorid_list.h>
@@ -33,6 +35,69 @@ static bool errata_probe_pbmt(unsigned int stage,
return false;
}
+/*
+ * th.dcache.ipa rs1 (invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ * 0000001 01010 rs1 000 00000 0001011
+ * th.dcache.iva rs1 (invalidate, virtual address)
+ * 0000001 00110 rs1 000 00000 0001011
+ *
+ * th.dcache.cpa rs1 (clean, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ * 0000001 01001 rs1 000 00000 0001011
+ * th.dcache.cva rs1 (clean, virtual address)
+ * 0000001 00101 rs1 000 00000 0001011
+ *
+ * th.dcache.cipa rs1 (clean then invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ * 0000001 01011 rs1 000 00000 0001011
+ * th.dcache.civa rs1 (clean then invalidate, virtual address)
+ * 0000001 00111 rs1 000 00000 0001011
+ *
+ * th.sync.s (make sure all cache operations finished)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ * 0000000 11001 00000 000 00000 0001011
+ */
+#define THEAD_INVAL_A0 ".long 0x02a5000b"
+#define THEAD_CLEAN_A0 ".long 0x0295000b"
+#define THEAD_FLUSH_A0 ".long 0x02b5000b"
+#define THEAD_SYNC_S ".long 0x0190000b"
+
+#define THEAD_CMO_OP(_op, _start, _size, _cachesize) \
+asm volatile("mv a0, %1\n\t" \
+ "j 2f\n\t" \
+ "3:\n\t" \
+ THEAD_##_op##_A0 "\n\t" \
+ "add a0, a0, %0\n\t" \
+ "2:\n\t" \
+ "bltu a0, %2, 3b\n\t" \
+ THEAD_SYNC_S \
+ : : "r"(_cachesize), \
+ "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \
+ "r"((unsigned long)(_start) + (_size)) \
+ : "a0")
+
+static void thead_errata_cache_inv(phys_addr_t paddr, size_t size)
+{
+ THEAD_CMO_OP(INVAL, paddr, size, riscv_cbom_block_size);
+}
+
+static void thead_errata_cache_wback(phys_addr_t paddr, size_t size)
+{
+ THEAD_CMO_OP(CLEAN, paddr, size, riscv_cbom_block_size);
+}
+
+static void thead_errata_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+ THEAD_CMO_OP(FLUSH, paddr, size, riscv_cbom_block_size);
+}
+
+static const struct riscv_nonstd_cache_ops thead_errata_cmo_ops = {
+ .wback = &thead_errata_cache_wback,
+ .inv = &thead_errata_cache_inv,
+ .wback_inv = &thead_errata_cache_wback_inv,
+};
+
static bool errata_probe_cmo(unsigned int stage,
unsigned long arch_id, unsigned long impid)
{
@@ -48,6 +113,7 @@ static bool errata_probe_cmo(unsigned int stage,
if (stage == RISCV_ALTERNATIVES_BOOT) {
riscv_cbom_block_size = L1_CACHE_BYTES;
riscv_noncoherent_supported();
+ riscv_noncoherent_register_cache_ops(&thead_errata_cmo_ops);
}
return true;
@@ -77,8 +143,7 @@ static u32 thead_errata_probe(unsigned int stage,
if (errata_probe_pbmt(stage, archid, impid))
cpu_req_errata |= BIT(ERRATA_THEAD_PBMT);
- if (errata_probe_cmo(stage, archid, impid))
- cpu_req_errata |= BIT(ERRATA_THEAD_CMO);
+ errata_probe_cmo(stage, archid, impid);
if (errata_probe_pmu(stage, archid, impid))
cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..c20236a0725b
--- /dev/null
+++ b/arch/riscv/include/asm/arch_hweight.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Based on arch/x86/include/asm/arch_hweight.h
+ */
+
+#ifndef _ASM_RISCV_HWEIGHT_H
+#define _ASM_RISCV_HWEIGHT_H
+
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+#if (BITS_PER_LONG == 64)
+#define CPOPW "cpopw "
+#elif (BITS_PER_LONG == 32)
+#define CPOPW "cpop "
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
+{
+#ifdef CONFIG_RISCV_ISA_ZBB
+ asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ RISCV_ISA_EXT_ZBB, 1)
+ : : : : legacy);
+
+ asm (".option push\n"
+ ".option arch,+zbb\n"
+ CPOPW "%0, %0\n"
+ ".option pop\n"
+ : "+r" (w) : :);
+
+ return w;
+
+legacy:
+#endif
+ return __sw_hweight32(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+ return __arch_hweight32(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+ return __arch_hweight32(w & 0xff);
+}
+
+#if BITS_PER_LONG == 64
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+# ifdef CONFIG_RISCV_ISA_ZBB
+ asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ RISCV_ISA_EXT_ZBB, 1)
+ : : : : legacy);
+
+ asm (".option push\n"
+ ".option arch,+zbb\n"
+ "cpop %0, %0\n"
+ ".option pop\n"
+ : "+r" (w) : :);
+
+ return w;
+
+legacy:
+# endif
+ return __sw_hweight64(w);
+}
+#else /* BITS_PER_LONG == 64 */
+static inline unsigned long __arch_hweight64(__u64 w)
+{
+ return __arch_hweight32((u32)w) +
+ __arch_hweight32((u32)(w >> 32));
+}
+#endif /* !(BITS_PER_LONG == 64) */
+
+#endif /* _ASM_RISCV_HWEIGHT_H */
diff --git a/arch/riscv/include/asm/archrandom.h b/arch/riscv/include/asm/archrandom.h
new file mode 100644
index 000000000000..5345360adfb9
--- /dev/null
+++ b/arch/riscv/include/asm/archrandom.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel interface for the RISCV arch_random_* functions
+ *
+ * Copyright (c) 2023 Rivos Inc.
+ *
+ */
+
+#ifndef ASM_RISCV_ARCHRANDOM_H
+#define ASM_RISCV_ARCHRANDOM_H
+
+#include <asm/csr.h>
+#include <asm/processor.h>
+
+#define SEED_RETRY_LOOPS 100
+
+static inline bool __must_check csr_seed_long(unsigned long *v)
+{
+ unsigned int retry = SEED_RETRY_LOOPS, valid_seeds = 0;
+ const int needed_seeds = sizeof(long) / sizeof(u16);
+ u16 *entropy = (u16 *)v;
+
+ do {
+ /*
+ * The SEED CSR must be accessed with a read-write instruction.
+ */
+ unsigned long csr_seed = csr_swap(CSR_SEED, 0);
+ unsigned long opst = csr_seed & SEED_OPST_MASK;
+
+ switch (opst) {
+ case SEED_OPST_ES16:
+ entropy[valid_seeds++] = csr_seed & SEED_ENTROPY_MASK;
+ if (valid_seeds == needed_seeds)
+ return true;
+ break;
+
+ case SEED_OPST_DEAD:
+ pr_err_once("archrandom: Unrecoverable error\n");
+ return false;
+
+ case SEED_OPST_BIST:
+ case SEED_OPST_WAIT:
+ default:
+ cpu_relax();
+ continue;
+ }
+ } while (--retry);
+
+ return false;
+}
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+ return 0;
+}
+
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+ if (!max_longs)
+ return 0;
+
+ /*
+ * If Zkr is supported and csr_seed_long succeeds, we return one long
+ * worth of entropy.
+ */
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZKR) && csr_seed_long(v))
+ return 1;
+
+ return 0;
+}
+
+#endif /* ASM_RISCV_ARCHRANDOM_H */
diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
index 00a96e7a9664..0c8bfd54fc4e 100644
--- a/arch/riscv/include/asm/asm-extable.h
+++ b/arch/riscv/include/asm/asm-extable.h
@@ -6,6 +6,7 @@
#define EX_TYPE_FIXUP 1
#define EX_TYPE_BPF 2
#define EX_TYPE_UACCESS_ERR_ZERO 3
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
#ifdef CONFIG_MMU
@@ -47,6 +48,11 @@
#define EX_DATA_REG_ZERO_SHIFT 5
#define EX_DATA_REG_ZERO GENMASK(9, 5)
+#define EX_DATA_REG_DATA_SHIFT 0
+#define EX_DATA_REG_DATA GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT 5
+#define EX_DATA_REG_ADDR GENMASK(9, 5)
+
#define EX_DATA_REG(reg, gpr) \
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
@@ -62,6 +68,15 @@
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
+#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
+ "(" \
+ EX_DATA_REG(DATA, data) " | " \
+ EX_DATA_REG(ADDR, addr) \
+ ")")
+
#endif /* __ASSEMBLY__ */
#else /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index 36b955c762ba..cd627ec289f1 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b);
+#ifdef CONFIG_RISCV_ISA_V
+
+#ifdef CONFIG_MMU
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
+#endif /* CONFIG_MMU */
+
+void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4,
+ const unsigned long *__restrict p5);
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs);
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs);
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+#endif /* CONFIG_RISCV_ISA_V */
#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 224b4dc02b50..9ffc35537024 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -271,7 +271,9 @@ legacy:
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/hweight.h>
+#include <asm/arch_hweight.h>
+
+#include <asm-generic/bitops/const_hweight.h>
#if (BITS_PER_LONG == 64)
#define __AMO(op) "amo" #op ".d"
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 3cb53c4df27c..a129dac4521d 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -37,7 +37,8 @@ static inline void flush_dcache_page(struct page *page)
flush_icache_mm(vma->vm_mm, 0)
#ifdef CONFIG_64BIT
-#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end)
+#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end)
+#define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end)
#endif
#ifndef CONFIG_SMP
diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
new file mode 100644
index 000000000000..a5b60b54b101
--- /dev/null
+++ b/arch/riscv/include/asm/checksum.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Checksum routines
+ *
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#ifndef __ASM_RISCV_CHECKSUM_H
+#define __ASM_RISCV_CHECKSUM_H
+
+#include <linux/in6.h>
+#include <linux/uaccess.h>
+
+#define ip_fast_csum ip_fast_csum
+
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+/* Default version is sufficient for 32 bit */
+#ifndef CONFIG_32BIT
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
+#endif
+
+/* Define riscv versions of functions before importing asm-generic/checksum.h */
+#include <asm-generic/checksum.h>
+
+/**
+ * Quickly compute an IP checksum with the assumption that IPv4 headers will
+ * always be in multiples of 32-bits, and have an ihl of at least 5.
+ *
+ * @ihl: the number of 32 bit segments and must be greater than or equal to 5.
+ * @iph: assumed to be word aligned given that NET_IP_ALIGN is set to 2 on
+ * riscv, defining IP headers to be aligned.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ unsigned long csum = 0;
+ int pos = 0;
+
+ do {
+ csum += ((const unsigned int *)iph)[pos];
+ if (IS_ENABLED(CONFIG_32BIT))
+ csum += csum < ((const unsigned int *)iph)[pos];
+ } while (++pos < ihl);
+
+ /*
+ * ZBB only saves three instructions on 32-bit and five on 64-bit so not
+ * worth checking if supported without Alternatives.
+ */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+ IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ unsigned long fold_temp;
+
+ asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ RISCV_ISA_EXT_ZBB, 1)
+ :
+ :
+ :
+ : no_zbb);
+
+ if (IS_ENABLED(CONFIG_32BIT)) {
+ asm(".option push \n\
+ .option arch,+zbb \n\
+ not %[fold_temp], %[csum] \n\
+ rori %[csum], %[csum], 16 \n\
+ sub %[csum], %[fold_temp], %[csum] \n\
+ .option pop"
+ : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
+ } else {
+ asm(".option push \n\
+ .option arch,+zbb \n\
+ rori %[fold_temp], %[csum], 32 \n\
+ add %[csum], %[fold_temp], %[csum] \n\
+ srli %[csum], %[csum], 32 \n\
+ not %[fold_temp], %[csum] \n\
+ roriw %[csum], %[csum], 16 \n\
+ subw %[csum], %[fold_temp], %[csum] \n\
+ .option pop"
+ : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
+ }
+ return (__force __sum16)(csum >> 16);
+ }
+no_zbb:
+#ifndef CONFIG_32BIT
+ csum += ror64(csum, 32);
+ csum >>= 32;
+#endif
+ return csum_fold((__force __wsum)csum);
+}
+
+#endif /* __ASM_RISCV_CHECKSUM_H */
diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h
index aa128466c4d4..176b570ef982 100644
--- a/arch/riscv/include/asm/cpu_ops.h
+++ b/arch/riscv/include/asm/cpu_ops.h
@@ -13,33 +13,23 @@
/**
* struct cpu_operations - Callback operations for hotplugging CPUs.
*
- * @name: Name of the boot protocol.
- * @cpu_prepare: Early one-time preparation step for a cpu. If there
- * is a mechanism for doing so, tests whether it is
- * possible to boot the given HART.
* @cpu_start: Boots a cpu into the kernel.
- * @cpu_disable: Prepares a cpu to die. May fail for some
- * mechanism-specific reason, which will cause the hot
- * unplug to be aborted. Called from the cpu to be killed.
* @cpu_stop: Makes a cpu leave the kernel. Must not fail. Called from
* the cpu being stopped.
* @cpu_is_stopped: Ensures a cpu has left the kernel. Called from another
* cpu.
*/
struct cpu_operations {
- const char *name;
- int (*cpu_prepare)(unsigned int cpu);
int (*cpu_start)(unsigned int cpu,
struct task_struct *tidle);
#ifdef CONFIG_HOTPLUG_CPU
- int (*cpu_disable)(unsigned int cpu);
void (*cpu_stop)(void);
int (*cpu_is_stopped)(unsigned int cpu);
#endif
};
extern const struct cpu_operations cpu_ops_spinwait;
-extern const struct cpu_operations *cpu_ops[NR_CPUS];
-void __init cpu_set_ops(int cpu);
+extern const struct cpu_operations *cpu_ops;
+void __init cpu_set_ops(void);
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index a418c3112cd6..5a626ed2c47a 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -59,6 +59,8 @@ struct riscv_isa_ext_data {
const unsigned int id;
const char *name;
const char *property;
+ const unsigned int *subset_ext_ids;
+ const unsigned int subset_ext_size;
};
extern const struct riscv_isa_ext_data riscv_isa_ext[];
@@ -67,7 +69,7 @@ extern bool riscv_isa_fallback;
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit);
#define riscv_isa_extension_available(isa_bitmap, ext) \
__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
@@ -133,4 +135,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
}
+DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
#endif
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 306a19a5509c..510014051f5d 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -411,6 +411,15 @@
#define CSR_VTYPE 0xc21
#define CSR_VLENB 0xc22
+/* Scalar Crypto Extension - Entropy */
+#define CSR_SEED 0x015
+#define SEED_OPST_MASK _AC(0xC0000000, UL)
+#define SEED_OPST_BIST _AC(0x00000000, UL)
+#define SEED_OPST_WAIT _AC(0x40000000, UL)
+#define SEED_OPST_ES16 _AC(0x80000000, UL)
+#define SEED_OPST_DEAD _AC(0xC0000000, UL)
+#define SEED_ENTROPY_MASK _AC(0xFFFF, UL)
+
#ifdef CONFIG_RISCV_M_MODE
# define CSR_STATUS CSR_MSTATUS
# define CSR_IE CSR_MIE
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
index 7ab5e34318c8..2293e535f865 100644
--- a/arch/riscv/include/asm/entry-common.h
+++ b/arch/riscv/include/asm/entry-common.h
@@ -4,6 +4,23 @@
#define _ASM_RISCV_ENTRY_COMMON_H
#include <asm/stacktrace.h>
+#include <asm/thread_info.h>
+#include <asm/vector.h>
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
+ clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
+ /*
+ * We are already called with irq disabled, so go without
+ * keeping track of riscv_v_flags.
+ */
+ riscv_v_vstate_restore(&current->thread.vstate, regs);
+ }
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
void handle_page_fault(struct pt_regs *regs);
void handle_break(struct pt_regs *regs);
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 83ed25e43553..ea33288f8a25 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -24,9 +24,8 @@
#ifdef CONFIG_ERRATA_THEAD
#define ERRATA_THEAD_PBMT 0
-#define ERRATA_THEAD_CMO 1
-#define ERRATA_THEAD_PMU 2
-#define ERRATA_THEAD_NUMBER 3
+#define ERRATA_THEAD_PMU 1
+#define ERRATA_THEAD_NUMBER 2
#endif
#ifdef __ASSEMBLY__
@@ -94,54 +93,17 @@ asm volatile(ALTERNATIVE( \
#define ALT_THEAD_PMA(_val)
#endif
-/*
- * th.dcache.ipa rs1 (invalidate, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- * 0000001 01010 rs1 000 00000 0001011
- * th.dache.iva rs1 (invalida, virtual address)
- * 0000001 00110 rs1 000 00000 0001011
- *
- * th.dcache.cpa rs1 (clean, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- * 0000001 01001 rs1 000 00000 0001011
- * th.dcache.cva rs1 (clean, virtual address)
- * 0000001 00101 rs1 000 00000 0001011
- *
- * th.dcache.cipa rs1 (clean then invalidate, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- * 0000001 01011 rs1 000 00000 0001011
- * th.dcache.civa rs1 (... virtual address)
- * 0000001 00111 rs1 000 00000 0001011
- *
- * th.sync.s (make sure all cache operations finished)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- * 0000000 11001 00000 000 00000 0001011
- */
-#define THEAD_INVAL_A0 ".long 0x0265000b"
-#define THEAD_CLEAN_A0 ".long 0x0255000b"
-#define THEAD_FLUSH_A0 ".long 0x0275000b"
-#define THEAD_SYNC_S ".long 0x0190000b"
-
#define ALT_CMO_OP(_op, _start, _size, _cachesize) \
-asm volatile(ALTERNATIVE_2( \
- __nops(6), \
+asm volatile(ALTERNATIVE( \
+ __nops(5), \
"mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
CBO_##_op(a0) \
"add a0, a0, %0\n\t" \
"2:\n\t" \
- "bltu a0, %2, 3b\n\t" \
- "nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \
- "mv a0, %1\n\t" \
- "j 2f\n\t" \
- "3:\n\t" \
- THEAD_##_op##_A0 "\n\t" \
- "add a0, a0, %0\n\t" \
- "2:\n\t" \
- "bltu a0, %2, 3b\n\t" \
- THEAD_SYNC_S, THEAD_VENDOR_ID, \
- ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \
+ "bltu a0, %2, 3b\n\t", \
+ 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM) \
: : "r"(_cachesize), \
"r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \
"r"((unsigned long)(_start) + (_size)) \
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index 2b2f5df7ef2c..329172122952 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -128,7 +128,23 @@ do { \
struct dyn_ftrace;
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
-#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+struct ftrace_ops;
+struct ftrace_regs;
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
+static inline void __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+ regs->t1 = addr;
+}
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+ __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+#endif /* __ASSEMBLY__ */
#endif /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 06d30526ef3b..5340f818746b 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -11,19 +11,13 @@
#include <uapi/asm/hwcap.h>
#define RISCV_ISA_EXT_a ('a' - 'a')
-#define RISCV_ISA_EXT_b ('b' - 'a')
#define RISCV_ISA_EXT_c ('c' - 'a')
#define RISCV_ISA_EXT_d ('d' - 'a')
#define RISCV_ISA_EXT_f ('f' - 'a')
#define RISCV_ISA_EXT_h ('h' - 'a')
#define RISCV_ISA_EXT_i ('i' - 'a')
-#define RISCV_ISA_EXT_j ('j' - 'a')
-#define RISCV_ISA_EXT_k ('k' - 'a')
#define RISCV_ISA_EXT_m ('m' - 'a')
-#define RISCV_ISA_EXT_p ('p' - 'a')
#define RISCV_ISA_EXT_q ('q' - 'a')
-#define RISCV_ISA_EXT_s ('s' - 'a')
-#define RISCV_ISA_EXT_u ('u' - 'a')
#define RISCV_ISA_EXT_v ('v' - 'a')
/*
@@ -57,8 +51,38 @@
#define RISCV_ISA_EXT_ZIHPM 42
#define RISCV_ISA_EXT_SMSTATEEN 43
#define RISCV_ISA_EXT_ZICOND 44
+#define RISCV_ISA_EXT_ZBC 45
+#define RISCV_ISA_EXT_ZBKB 46
+#define RISCV_ISA_EXT_ZBKC 47
+#define RISCV_ISA_EXT_ZBKX 48
+#define RISCV_ISA_EXT_ZKND 49
+#define RISCV_ISA_EXT_ZKNE 50
+#define RISCV_ISA_EXT_ZKNH 51
+#define RISCV_ISA_EXT_ZKR 52
+#define RISCV_ISA_EXT_ZKSED 53
+#define RISCV_ISA_EXT_ZKSH 54
+#define RISCV_ISA_EXT_ZKT 55
+#define RISCV_ISA_EXT_ZVBB 56
+#define RISCV_ISA_EXT_ZVBC 57
+#define RISCV_ISA_EXT_ZVKB 58
+#define RISCV_ISA_EXT_ZVKG 59
+#define RISCV_ISA_EXT_ZVKNED 60
+#define RISCV_ISA_EXT_ZVKNHA 61
+#define RISCV_ISA_EXT_ZVKNHB 62
+#define RISCV_ISA_EXT_ZVKSED 63
+#define RISCV_ISA_EXT_ZVKSH 64
+#define RISCV_ISA_EXT_ZVKT 65
+#define RISCV_ISA_EXT_ZFH 66
+#define RISCV_ISA_EXT_ZFHMIN 67
+#define RISCV_ISA_EXT_ZIHINTNTL 68
+#define RISCV_ISA_EXT_ZVFH 69
+#define RISCV_ISA_EXT_ZVFHMIN 70
+#define RISCV_ISA_EXT_ZFA 71
+#define RISCV_ISA_EXT_ZTSO 72
+#define RISCV_ISA_EXT_ZACAS 73
-#define RISCV_ISA_EXT_MAX 64
+#define RISCV_ISA_EXT_MAX 128
+#define RISCV_ISA_EXT_INVALID U32_MAX
#ifdef CONFIG_RISCV_M_MODE
#define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 5c48f48e79a6..630507dff5ea 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -15,4 +15,28 @@ static inline bool riscv_hwprobe_key_is_valid(__s64 key)
return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
}
+static inline bool hwprobe_key_is_bitmask(__s64 key)
+{
+ switch (key) {
+ case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
+ case RISCV_HWPROBE_KEY_IMA_EXT_0:
+ case RISCV_HWPROBE_KEY_CPUPERF_0:
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
+ struct riscv_hwprobe *other_pair)
+{
+ if (pair->key != other_pair->key)
+ return false;
+
+ if (hwprobe_key_is_bitmask(pair->key))
+ return (pair->value & other_pair->value) == other_pair->value;
+
+ return pair->value == other_pair->value;
+}
+
#endif
diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h
index 0bbffd528096..7388edd88986 100644
--- a/arch/riscv/include/asm/kfence.h
+++ b/arch/riscv/include/asm/kfence.h
@@ -18,9 +18,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
pte_t *pte = virt_to_kpte(addr);
if (protect)
- set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+ set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT));
else
- set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+ set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT));
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 0eefd9c991ae..484d04a92fa6 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -41,6 +41,7 @@
KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HFENCE \
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6)
enum kvm_riscv_hfence_type {
KVM_RISCV_HFENCE_UNKNOWN = 0,
@@ -262,13 +263,17 @@ struct kvm_vcpu_arch {
/* 'static' configurations which are set only once */
struct kvm_vcpu_config cfg;
+
+ /* SBI steal-time accounting */
+ struct {
+ gpa_t shmem;
+ u64 last_steal;
+ } sta;
};
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
@@ -372,4 +377,7 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
+void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
+
#endif /* __RISCV_KVM_HOST_H__ */
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
index 6a453f7f8b56..b96705258cf9 100644
--- a/arch/riscv/include/asm/kvm_vcpu_sbi.h
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -15,9 +15,10 @@
#define KVM_SBI_VERSION_MINOR 0
enum kvm_riscv_sbi_ext_status {
- KVM_RISCV_SBI_EXT_UNINITIALIZED,
- KVM_RISCV_SBI_EXT_AVAILABLE,
- KVM_RISCV_SBI_EXT_UNAVAILABLE,
+ KVM_RISCV_SBI_EXT_STATUS_UNINITIALIZED,
+ KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE,
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED,
+ KVM_RISCV_SBI_EXT_STATUS_DISABLED,
};
struct kvm_vcpu_sbi_context {
@@ -36,7 +37,7 @@ struct kvm_vcpu_sbi_extension {
unsigned long extid_start;
unsigned long extid_end;
- bool default_unavail;
+ bool default_disabled;
/**
* SBI extension handler. It can be defined for a given extension or group of
@@ -59,11 +60,21 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg);
int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
struct kvm_vcpu *vcpu, unsigned long extid);
+bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx);
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run);
void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num,
+ unsigned long *reg_val);
+int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long reg_num,
+ unsigned long reg_val);
+
#ifdef CONFIG_RISCV_SBI_V01
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
#endif
@@ -74,6 +85,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
diff --git a/arch/riscv/include/asm/paravirt.h b/arch/riscv/include/asm/paravirt.h
new file mode 100644
index 000000000000..c0abde70fc2c
--- /dev/null
+++ b/arch/riscv/include/asm/paravirt.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_PARAVIRT_H
+#define _ASM_RISCV_PARAVIRT_H
+
+#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 dummy_steal_clock(int cpu);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return static_call(pv_steal_clock)(cpu);
+}
+
+int __init pv_time_init(void);
+
+#else
+
+#define pv_time_init() do {} while (0)
+
+#endif /* CONFIG_PARAVIRT */
+#endif /* _ASM_RISCV_PARAVIRT_H */
diff --git a/arch/riscv/include/asm/paravirt_api_clock.h b/arch/riscv/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/riscv/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 9a2c780a11e9..b42017d76924 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -202,7 +202,7 @@ static inline int pud_user(pud_t pud)
static inline void set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
}
static inline void pud_clear(pud_t *pudp)
@@ -278,7 +278,7 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
- *p4dp = p4d;
+ WRITE_ONCE(*p4dp, p4d);
else
set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
}
@@ -340,18 +340,12 @@ static inline struct page *p4d_page(p4d_t p4d)
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
#define pud_offset pud_offset
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
-{
- if (pgtable_l4_enabled)
- return p4d_pgtable(*p4d) + pud_index(address);
-
- return (pud_t *)p4d;
-}
+pud_t *pud_offset(p4d_t *p4d, unsigned long address);
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
if (pgtable_l5_enabled)
- *pgdp = pgd;
+ WRITE_ONCE(*pgdp, pgd);
else
set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
}
@@ -404,12 +398,6 @@ static inline struct page *pgd_page(pgd_t pgd)
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
#define p4d_offset p4d_offset
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
-{
- if (pgtable_l5_enabled)
- return pgd_pgtable(*pgd) + p4d_index(address);
-
- return (p4d_t *)pgd;
-}
+p4d_t *p4d_offset(pgd_t *pgd, unsigned long address);
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7b4287f36054..0c94260b5d0c 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -248,7 +248,7 @@ static inline int pmd_leaf(pmd_t pmd)
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -510,7 +510,7 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
- *ptep = pteval;
+ WRITE_ONCE(*ptep, pteval);
}
void flush_icache_pte(pte_t pte);
@@ -544,19 +544,12 @@ static inline void pte_clear(struct mm_struct *mm,
__set_pte_at(ptep, __pte(0));
}
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-static inline int ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep,
- pte_t entry, int dirty)
-{
- if (!pte_same(*ptep, entry))
- __set_pte_at(ptep, entry);
- /*
- * update_mmu_cache will unconditionally execute, handling both
- * the case that the PTE changed and the spurious fault case.
- */
- return true;
-}
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */
+extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, pte_t entry, int dirty);
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */
+extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep);
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
@@ -569,16 +562,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return pte;
}
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address,
- pte_t *ptep)
-{
- if (!pte_young(*ptep))
- return 0;
- return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
-}
-
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
@@ -882,7 +865,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
#ifdef CONFIG_COMPAT
-#define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE)
+#define TASK_SIZE_32 (_AC(0x80000000, UL))
#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
#else
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f19f861cda54..a8509cc31ab2 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -16,7 +16,7 @@
#ifdef CONFIG_64BIT
#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1))
-#define STACK_TOP_MAX TASK_SIZE_64
+#define STACK_TOP_MAX TASK_SIZE
#define arch_get_mmap_end(addr, len, flags) \
({ \
@@ -73,6 +73,43 @@
struct task_struct;
struct pt_regs;
+/*
+ * We use a flag to track in-kernel Vector context. Currently the flag has the
+ * following meaning:
+ *
+ * - bit 0: indicates whether the in-kernel Vector context is active. The
+ * activation of this state disables the preemption. On a non-RT kernel, it
+ * also disable bh.
+ * - bits 8: is used for tracking preemptible kernel-mode Vector, when
+ * RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
+ * disable the preemption if the thread's kernel_vstate.datap is allocated.
+ * Instead, the kernel set this bit field. Then the trap entry/exit code
+ * knows if we are entering/exiting the context that owns preempt_v.
+ * - 0: the task is not using preempt_v
+ * - 1: the task is actively using preempt_v. But whether does the task own
+ * the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
+ * - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
+ * when preempt_v starts:
+ * - 0: the task is actively using, and own preempt_v context.
+ * - non-zero: the task was using preempt_v, but then took a trap within.
+ * Thus, the task does not own preempt_v. Any use of Vector will have to
+ * save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
+ * Vector.
+ * - bit 30: The in-kernel preempt_v context is saved, and requries to be
+ * restored when returning to the context that owns the preempt_v.
+ * - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
+ * trap entry code. Any context switches out-of current task need to save
+ * it to the task's in-kernel V context. Also, any traps nesting on-top-of
+ * preempt_v requesting to use V needs a save.
+ */
+#define RISCV_V_CTX_DEPTH_MASK 0x00ff0000
+
+#define RISCV_V_CTX_UNIT_DEPTH 0x00010000
+#define RISCV_KERNEL_MODE_V 0x00000001
+#define RISCV_PREEMPT_V 0x00000100
+#define RISCV_PREEMPT_V_DIRTY 0x80000000
+#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000
+
/* CPU-specific state of a task */
struct thread_struct {
/* Callee-saved registers */
@@ -81,9 +118,11 @@ struct thread_struct {
unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate;
unsigned long bad_cause;
- unsigned long vstate_ctrl;
+ u32 riscv_v_flags;
+ u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
unsigned long align_ctl;
+ struct __riscv_v_ext_state kernel_vstate;
};
/* Whitelist the fstate from the task_struct for hardened usercopy */
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 0892f4421bc4..6e68f8dff76b 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -29,8 +29,10 @@ enum sbi_ext_id {
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
SBI_EXT_SRST = 0x53525354,
+ SBI_EXT_SUSP = 0x53555350,
SBI_EXT_PMU = 0x504D55,
SBI_EXT_DBCN = 0x4442434E,
+ SBI_EXT_STA = 0x535441,
/* Experimentals extensions must lie within this range */
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@@ -114,6 +116,14 @@ enum sbi_srst_reset_reason {
SBI_SRST_RESET_REASON_SYS_FAILURE,
};
+enum sbi_ext_susp_fid {
+ SBI_EXT_SUSP_SYSTEM_SUSPEND = 0,
+};
+
+enum sbi_ext_susp_sleep_type {
+ SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM = 0,
+};
+
enum sbi_ext_pmu_fid {
SBI_EXT_PMU_NUM_COUNTERS = 0,
SBI_EXT_PMU_COUNTER_GET_INFO,
@@ -243,6 +253,22 @@ enum sbi_ext_dbcn_fid {
SBI_EXT_DBCN_CONSOLE_WRITE_BYTE = 2,
};
+/* SBI STA (steal-time accounting) extension */
+enum sbi_ext_sta_fid {
+ SBI_EXT_STA_STEAL_TIME_SET_SHMEM = 0,
+};
+
+struct sbi_sta_struct {
+ __le32 sequence;
+ __le32 flags;
+ __le64 steal;
+ u8 preempted;
+ u8 pad[47];
+} __packed;
+
+#define SBI_STA_SHMEM_DISABLE -1
+
+/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
@@ -271,8 +297,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
+#ifdef CONFIG_RISCV_SBI_V01
void sbi_console_putchar(int ch);
int sbi_console_getchar(void);
+#else
+static inline void sbi_console_putchar(int ch) { }
+static inline int sbi_console_getchar(void) { return -ENOENT; }
+#endif
long sbi_get_mvendorid(void);
long sbi_get_marchid(void);
long sbi_get_mimpid(void);
@@ -329,6 +360,11 @@ static inline unsigned long sbi_mk_version(unsigned long major,
}
int sbi_err_map_linux_errno(int err);
+
+extern bool sbi_debug_console_available;
+int sbi_debug_console_write(const char *bytes, unsigned int num_bytes);
+int sbi_debug_console_read(char *bytes, unsigned int num_bytes);
+
#else /* CONFIG_RISCV_SBI */
static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
static inline void sbi_init(void) {}
diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
index 32336e8a17cb..a393d5035c54 100644
--- a/arch/riscv/include/asm/sections.h
+++ b/arch/riscv/include/asm/sections.h
@@ -13,6 +13,7 @@ extern char _start_kernel[];
extern char __init_data_begin[], __init_data_end[];
extern char __init_text_begin[], __init_text_end[];
extern char __alt_start[], __alt_end[];
+extern char __exittext_begin[], __exittext_end[];
static inline bool is_va_kernel_text(uintptr_t va)
{
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
new file mode 100644
index 000000000000..54efbf523d49
--- /dev/null
+++ b/arch/riscv/include/asm/simd.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+
+#include <asm/vector.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+/*
+ * may_use_simd - whether it is allowable at this time to issue vector
+ * instructions or access the vector register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+ /*
+ * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
+ * and is clear whenever preemption is enabled.
+ */
+ if (in_hardirq() || in_nmi())
+ return false;
+
+ /*
+ * Nesting is acheived in preempt_v by spreading the control for
+ * preemptible and non-preemptible kernel-mode Vector into two fields.
+ * Always try to match with prempt_v if kernel V-context exists. Then,
+ * fallback to check non preempt_v if nesting happens, or if the config
+ * is not set.
+ */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
+ if (!riscv_preempt_v_started(current))
+ return true;
+ }
+ /*
+ * Non-preemptible kernel-mode Vector temporarily disables bh. So we
+ * must not return true on irq_disabled(). Otherwise we would fail the
+ * lockdep check calling local_bh_enable()
+ */
+ return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V);
+}
+
+#else /* ! CONFIG_RISCV_ISA_V */
+
+static __must_check inline bool may_use_simd(void)
+{
+ return false;
+}
+
+#endif /* ! CONFIG_RISCV_ISA_V */
+
+#endif
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index f90d8e42f3c7..7efdb0584d47 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev,
struct pt_regs *regs;
regs = task_pt_regs(prev);
- if (unlikely(regs->status & SR_SD))
- fstate_save(prev, regs);
+ fstate_save(prev, regs);
fstate_restore(next, task_pt_regs(next));
}
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 574779900bfb..5d473343634b 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -28,7 +28,6 @@
#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER)
#define OVERFLOW_STACK_SIZE SZ_4K
-#define SHADOW_OVERFLOW_STACK_SIZE (1024)
#define IRQ_STACK_SIZE THREAD_SIZE
@@ -103,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
#define TIF_32BIT 11 /* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
#define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
diff --git a/arch/riscv/include/asm/tlbbatch.h b/arch/riscv/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..46014f70b9da
--- /dev/null
+++ b/arch/riscv/include/asm/tlbbatch.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+#ifndef _ASM_RISCV_TLBBATCH_H
+#define _ASM_RISCV_TLBBATCH_H
+
+#include <linux/cpumask.h>
+
+struct arch_tlbflush_unmap_batch {
+ struct cpumask cpumask;
+};
+
+#endif /* _ASM_RISCV_TLBBATCH_H */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 8f3418c5f172..928f096dca21 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -41,11 +41,20 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
#endif
+
+bool arch_tlbbatch_should_defer(struct mm_struct *mm);
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr);
+void arch_flush_tlb_batched_pending(struct mm_struct *mm);
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+
#else /* CONFIG_SMP && CONFIG_MMU */
#define flush_tlb_all() local_flush_tlb_all()
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 87aaef656257..0cd6f0a027d1 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -22,6 +22,18 @@
extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void);
bool riscv_v_first_use_handler(struct pt_regs *regs);
+void kernel_vector_begin(void);
+void kernel_vector_end(void);
+void get_cpu_vector_context(void);
+void put_cpu_vector_context(void);
+void riscv_v_thread_free(struct task_struct *tsk);
+void __init riscv_v_setup_ctx_cache(void);
+void riscv_v_thread_alloc(struct task_struct *tsk);
+
+static inline u32 riscv_v_flags(void)
+{
+ return READ_ONCE(current->thread.riscv_v_flags);
+}
static __always_inline bool has_vector(void)
{
@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs)
__riscv_v_vstate_dirty(regs);
}
-static inline void riscv_v_vstate_save(struct task_struct *task,
+static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) == SR_VS_DIRTY) {
- struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
__riscv_v_vstate_save(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
}
}
-static inline void riscv_v_vstate_restore(struct task_struct *task,
+static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
struct pt_regs *regs)
{
if ((regs->status & SR_VS) != SR_VS_OFF) {
- struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
__riscv_v_vstate_restore(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
}
}
+static inline void riscv_v_vstate_set_restore(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if ((regs->status & SR_VS) != SR_VS_OFF) {
+ set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
+ riscv_v_vstate_on(regs);
+ }
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline bool riscv_preempt_v_dirty(struct task_struct *task)
+{
+ return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY);
+}
+
+static inline bool riscv_preempt_v_restore(struct task_struct *task)
+{
+ return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
+{
+ barrier();
+ task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_set_restore(struct task_struct *task)
+{
+ barrier();
+ task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
+}
+
+static inline bool riscv_preempt_v_started(struct task_struct *task)
+{
+ return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V);
+}
+
+#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
+static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
+#define riscv_preempt_v_clear_dirty(tsk) do {} while (0)
+#define riscv_preempt_v_set_restore(tsk) do {} while (0)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
static inline void __switch_to_vector(struct task_struct *prev,
struct task_struct *next)
{
struct pt_regs *regs;
- regs = task_pt_regs(prev);
- riscv_v_vstate_save(prev, regs);
- riscv_v_vstate_restore(next, task_pt_regs(next));
+ if (riscv_preempt_v_started(prev)) {
+ if (riscv_preempt_v_dirty(prev)) {
+ __riscv_v_vstate_save(&prev->thread.kernel_vstate,
+ prev->thread.kernel_vstate.datap);
+ riscv_preempt_v_clear_dirty(prev);
+ }
+ } else {
+ regs = task_pt_regs(prev);
+ riscv_v_vstate_save(&prev->thread.vstate, regs);
+ }
+
+ if (riscv_preempt_v_started(next))
+ riscv_preempt_v_set_restore(next);
+ else
+ riscv_v_vstate_set_restore(next, task_pt_regs(next));
}
void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
@@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
#define riscv_v_vsize (0)
#define riscv_v_vstate_discard(regs) do {} while (0)
-#define riscv_v_vstate_save(task, regs) do {} while (0)
-#define riscv_v_vstate_restore(task, regs) do {} while (0)
+#define riscv_v_vstate_save(vstate, regs) do {} while (0)
+#define riscv_v_vstate_restore(vstate, regs) do {} while (0)
#define __switch_to_vector(__prev, __next) do {} while (0)
#define riscv_v_vstate_off(regs) do {} while (0)
#define riscv_v_vstate_on(regs) do {} while (0)
+#define riscv_v_thread_free(tsk) do {} while (0)
+#define riscv_v_setup_ctx_cache() do {} while (0)
+#define riscv_v_thread_alloc(tsk) do {} while (0)
#endif /* CONFIG_RISCV_ISA_V */
diff --git a/arch/riscv/include/asm/word-at-a-time.h b/arch/riscv/include/asm/word-at-a-time.h
index 7c086ac6ecd4..f3f031e34191 100644
--- a/arch/riscv/include/asm/word-at-a-time.h
+++ b/arch/riscv/include/asm/word-at-a-time.h
@@ -9,6 +9,7 @@
#define _ASM_RISCV_WORD_AT_A_TIME_H
+#include <asm/asm-extable.h>
#include <linux/kernel.h>
struct word_at_a_time {
@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+ unsigned long ret;
+
+ /* Load word from unaligned pointer addr */
+ asm(
+ "1: " REG_L " %0, %2\n"
+ "2:\n"
+ _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
+ : "=&r" (ret)
+ : "r" (addr), "m" (*(unsigned long *)addr));
+
+ return ret;
+}
+
+#endif /* CONFIG_DCACHE_WORD_ACCESS */
+
#endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
diff --git a/arch/riscv/include/asm/xip_fixup.h b/arch/riscv/include/asm/xip_fixup.h
index d4ffc3c37649..b65bf6306f69 100644
--- a/arch/riscv/include/asm/xip_fixup.h
+++ b/arch/riscv/include/asm/xip_fixup.h
@@ -13,7 +13,7 @@
add \reg, \reg, t0
.endm
.macro XIP_FIXUP_FLASH_OFFSET reg
- la t1, __data_loc
+ la t0, __data_loc
REG_L t1, _xip_phys_offset
sub \reg, \reg, t1
add \reg, \reg, t0
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..96011861e46b
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/asm-prototypes.h>
+
+static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2)
+{
+ kernel_vector_begin();
+ xor_regs_2_(bytes, p1, p2);
+ kernel_vector_end();
+}
+
+static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3)
+{
+ kernel_vector_begin();
+ xor_regs_3_(bytes, p1, p2, p3);
+ kernel_vector_end();
+}
+
+static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4)
+{
+ kernel_vector_begin();
+ xor_regs_4_(bytes, p1, p2, p3, p4);
+ kernel_vector_end();
+}
+
+static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
+ const unsigned long *__restrict p2,
+ const unsigned long *__restrict p3,
+ const unsigned long *__restrict p4,
+ const unsigned long *__restrict p5)
+{
+ kernel_vector_begin();
+ xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+ kernel_vector_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+ .name = "rvv",
+ .do_2 = xor_vector_2,
+ .do_3 = xor_vector_3,
+ .do_4 = xor_vector_4,
+ .do_5 = xor_vector_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (has_vector()) { \
+ xor_speed(&xor_block_rvv);\
+ } \
+ } while (0)
+#endif
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index b659ffcfcdb4..9f2a8e3ff204 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -30,6 +30,35 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
#define RISCV_HWPROBE_EXT_ZBS (1 << 5)
#define RISCV_HWPROBE_EXT_ZICBOZ (1 << 6)
+#define RISCV_HWPROBE_EXT_ZBC (1 << 7)
+#define RISCV_HWPROBE_EXT_ZBKB (1 << 8)
+#define RISCV_HWPROBE_EXT_ZBKC (1 << 9)
+#define RISCV_HWPROBE_EXT_ZBKX (1 << 10)
+#define RISCV_HWPROBE_EXT_ZKND (1 << 11)
+#define RISCV_HWPROBE_EXT_ZKNE (1 << 12)
+#define RISCV_HWPROBE_EXT_ZKNH (1 << 13)
+#define RISCV_HWPROBE_EXT_ZKSED (1 << 14)
+#define RISCV_HWPROBE_EXT_ZKSH (1 << 15)
+#define RISCV_HWPROBE_EXT_ZKT (1 << 16)
+#define RISCV_HWPROBE_EXT_ZVBB (1 << 17)
+#define RISCV_HWPROBE_EXT_ZVBC (1 << 18)
+#define RISCV_HWPROBE_EXT_ZVKB (1 << 19)
+#define RISCV_HWPROBE_EXT_ZVKG (1 << 20)
+#define RISCV_HWPROBE_EXT_ZVKNED (1 << 21)
+#define RISCV_HWPROBE_EXT_ZVKNHA (1 << 22)
+#define RISCV_HWPROBE_EXT_ZVKNHB (1 << 23)
+#define RISCV_HWPROBE_EXT_ZVKSED (1 << 24)
+#define RISCV_HWPROBE_EXT_ZVKSH (1 << 25)
+#define RISCV_HWPROBE_EXT_ZVKT (1 << 26)
+#define RISCV_HWPROBE_EXT_ZFH (1 << 27)
+#define RISCV_HWPROBE_EXT_ZFHMIN (1 << 28)
+#define RISCV_HWPROBE_EXT_ZIHINTNTL (1 << 29)
+#define RISCV_HWPROBE_EXT_ZVFH (1 << 30)
+#define RISCV_HWPROBE_EXT_ZVFHMIN (1 << 31)
+#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32)
+#define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33)
+#define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34)
+#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
@@ -40,4 +69,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
+/* Flags */
+#define RISCV_HWPROBE_WHICH_CPUS (1 << 0)
+
#endif
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 60d3b21dead7..d6b7a5b95874 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -157,9 +157,16 @@ enum KVM_RISCV_SBI_EXT_ID {
KVM_RISCV_SBI_EXT_EXPERIMENTAL,
KVM_RISCV_SBI_EXT_VENDOR,
KVM_RISCV_SBI_EXT_DBCN,
+ KVM_RISCV_SBI_EXT_STA,
KVM_RISCV_SBI_EXT_MAX,
};
+/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_sbi_sta {
+ unsigned long shmem_lo;
+ unsigned long shmem_hi;
+};
+
/* Possible states for kvm_riscv_timer */
#define KVM_RISCV_TIMER_STATE_OFF 0
#define KVM_RISCV_TIMER_STATE_ON 1
@@ -241,6 +248,12 @@ enum KVM_RISCV_SBI_EXT_ID {
#define KVM_REG_RISCV_VECTOR_REG(n) \
((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long))
+/* Registers for specific SBI extensions are mapped as type 10 */
+#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_SBI_STA_REG(name) \
+ (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long))
+
/* Device Control API: RISC-V AIA */
#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000
#define KVM_DEV_RISCV_APLIC_SIZE 0x4000
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 82940b6a79a2..f71910718053 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -50,6 +50,7 @@ obj-y += setup.o
obj-y += signal.o
obj-y += syscall_table.o
obj-y += sys_riscv.o
+obj-y += sys_hwprobe.o
obj-y += time.o
obj-y += traps.o
obj-y += riscv_ksyms.o
@@ -63,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
+obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
@@ -85,6 +87,7 @@ obj-$(CONFIG_SMP) += sbi-ipi.o
obj-$(CONFIG_SMP) += cpu_ops_sbi.o
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index 457a18efcb11..28b58fc5ad19 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -18,7 +18,7 @@
bool cpu_has_hotplug(unsigned int cpu)
{
- if (cpu_ops[cpu]->cpu_stop)
+ if (cpu_ops->cpu_stop)
return true;
return false;
@@ -29,25 +29,18 @@ bool cpu_has_hotplug(unsigned int cpu)
*/
int __cpu_disable(void)
{
- int ret = 0;
unsigned int cpu = smp_processor_id();
- if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_stop)
+ if (!cpu_ops->cpu_stop)
return -EOPNOTSUPP;
- if (cpu_ops[cpu]->cpu_disable)
- ret = cpu_ops[cpu]->cpu_disable(cpu);
-
- if (ret)
- return ret;
-
remove_cpu_topology(cpu);
numa_remove_cpu(cpu);
set_cpu_online(cpu, false);
riscv_ipi_disable();
irq_migrate_all_off_this_cpu();
- return ret;
+ return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -62,8 +55,8 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
pr_notice("CPU%u: off\n", cpu);
/* Verify from the firmware if the cpu is really stopped*/
- if (cpu_ops[cpu]->cpu_is_stopped)
- ret = cpu_ops[cpu]->cpu_is_stopped(cpu);
+ if (cpu_ops->cpu_is_stopped)
+ ret = cpu_ops->cpu_is_stopped(cpu);
if (ret)
pr_warn("CPU%d may not have stopped: %d\n", cpu, ret);
}
@@ -77,7 +70,7 @@ void __noreturn arch_cpu_idle_dead(void)
cpuhp_ap_report_dead();
- cpu_ops[smp_processor_id()]->cpu_stop();
+ cpu_ops->cpu_stop();
/* It should never reach here */
BUG();
}
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
index eb479a88a954..6a8bd8f4db07 100644
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -13,25 +13,21 @@
#include <asm/sbi.h>
#include <asm/smp.h>
-const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
+const struct cpu_operations *cpu_ops __ro_after_init = &cpu_ops_spinwait;
extern const struct cpu_operations cpu_ops_sbi;
#ifndef CONFIG_RISCV_BOOT_SPINWAIT
const struct cpu_operations cpu_ops_spinwait = {
- .name = "",
- .cpu_prepare = NULL,
.cpu_start = NULL,
};
#endif
-void __init cpu_set_ops(int cpuid)
+void __init cpu_set_ops(void)
{
#if IS_ENABLED(CONFIG_RISCV_SBI)
if (sbi_probe_extension(SBI_EXT_HSM)) {
- if (!cpuid)
- pr_info("SBI HSM extension detected\n");
- cpu_ops[cpuid] = &cpu_ops_sbi;
- } else
+ pr_info("SBI HSM extension detected\n");
+ cpu_ops = &cpu_ops_sbi;
+ }
#endif
- cpu_ops[cpuid] = &cpu_ops_spinwait;
}
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index efa0f0816634..1cc7df740edd 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -79,23 +79,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
return sbi_hsm_hart_start(hartid, boot_addr, hsm_data);
}
-static int sbi_cpu_prepare(unsigned int cpuid)
-{
- if (!cpu_ops_sbi.cpu_start) {
- pr_err("cpu start method not defined for CPU [%d]\n", cpuid);
- return -ENODEV;
- }
- return 0;
-}
-
#ifdef CONFIG_HOTPLUG_CPU
-static int sbi_cpu_disable(unsigned int cpuid)
-{
- if (!cpu_ops_sbi.cpu_stop)
- return -EOPNOTSUPP;
- return 0;
-}
-
static void sbi_cpu_stop(void)
{
int ret;
@@ -118,11 +102,8 @@ static int sbi_cpu_is_stopped(unsigned int cpuid)
#endif
const struct cpu_operations cpu_ops_sbi = {
- .name = "sbi",
- .cpu_prepare = sbi_cpu_prepare,
.cpu_start = sbi_cpu_start,
#ifdef CONFIG_HOTPLUG_CPU
- .cpu_disable = sbi_cpu_disable,
.cpu_stop = sbi_cpu_stop,
.cpu_is_stopped = sbi_cpu_is_stopped,
#endif
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index d98d19226b5f..613872b0a21a 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -39,15 +39,6 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
}
-static int spinwait_cpu_prepare(unsigned int cpuid)
-{
- if (!cpu_ops_spinwait.cpu_start) {
- pr_err("cpu start method not defined for CPU [%d]\n", cpuid);
- return -ENODEV;
- }
- return 0;
-}
-
static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
{
/*
@@ -64,7 +55,5 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
}
const struct cpu_operations cpu_ops_spinwait = {
- .name = "spinwait",
- .cpu_prepare = spinwait_cpu_prepare,
.cpu_start = spinwait_cpu_start,
};
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index b3785ffc1570..89920f84d0a3 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -8,8 +8,10 @@
#include <linux/acpi.h>
#include <linux/bitmap.h>
+#include <linux/cpu.h>
#include <linux/cpuhotplug.h>
#include <linux/ctype.h>
+#include <linux/jump_label.h>
#include <linux/log2.h>
#include <linux/memory.h>
#include <linux/module.h>
@@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS];
/* Performance information */
DEFINE_PER_CPU(long, misaligned_access_speed);
+static cpumask_t fast_misaligned_access;
+
/**
* riscv_isa_extension_base() - Get base extension word
*
@@ -70,7 +74,7 @@ EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
*
* NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
*/
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit)
{
const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
@@ -102,17 +106,101 @@ static bool riscv_isa_extension_check(int id)
return false;
}
return true;
+ case RISCV_ISA_EXT_INVALID:
+ return false;
}
return true;
}
-#define __RISCV_ISA_EXT_DATA(_name, _id) { \
- .name = #_name, \
- .property = #_name, \
- .id = _id, \
+#define _RISCV_ISA_EXT_DATA(_name, _id, _subset_exts, _subset_exts_size) { \
+ .name = #_name, \
+ .property = #_name, \
+ .id = _id, \
+ .subset_ext_ids = _subset_exts, \
+ .subset_ext_size = _subset_exts_size \
}
+#define __RISCV_ISA_EXT_DATA(_name, _id) _RISCV_ISA_EXT_DATA(_name, _id, NULL, 0)
+
+/* Used to declare pure "lasso" extension (Zk for instance) */
+#define __RISCV_ISA_EXT_BUNDLE(_name, _bundled_exts) \
+ _RISCV_ISA_EXT_DATA(_name, RISCV_ISA_EXT_INVALID, _bundled_exts, ARRAY_SIZE(_bundled_exts))
+
+/* Used to declare extensions that are a superset of other extensions (Zvbb for instance) */
+#define __RISCV_ISA_EXT_SUPERSET(_name, _id, _sub_exts) \
+ _RISCV_ISA_EXT_DATA(_name, _id, _sub_exts, ARRAY_SIZE(_sub_exts))
+
+static const unsigned int riscv_zk_bundled_exts[] = {
+ RISCV_ISA_EXT_ZBKB,
+ RISCV_ISA_EXT_ZBKC,
+ RISCV_ISA_EXT_ZBKX,
+ RISCV_ISA_EXT_ZKND,
+ RISCV_ISA_EXT_ZKNE,
+ RISCV_ISA_EXT_ZKR,
+ RISCV_ISA_EXT_ZKT,
+};
+
+static const unsigned int riscv_zkn_bundled_exts[] = {
+ RISCV_ISA_EXT_ZBKB,
+ RISCV_ISA_EXT_ZBKC,
+ RISCV_ISA_EXT_ZBKX,
+ RISCV_ISA_EXT_ZKND,
+ RISCV_ISA_EXT_ZKNE,
+ RISCV_ISA_EXT_ZKNH,
+};
+
+static const unsigned int riscv_zks_bundled_exts[] = {
+ RISCV_ISA_EXT_ZBKB,
+ RISCV_ISA_EXT_ZBKC,
+ RISCV_ISA_EXT_ZKSED,
+ RISCV_ISA_EXT_ZKSH
+};
+
+#define RISCV_ISA_EXT_ZVKN \
+ RISCV_ISA_EXT_ZVKNED, \
+ RISCV_ISA_EXT_ZVKNHB, \
+ RISCV_ISA_EXT_ZVKB, \
+ RISCV_ISA_EXT_ZVKT
+
+static const unsigned int riscv_zvkn_bundled_exts[] = {
+ RISCV_ISA_EXT_ZVKN
+};
+
+static const unsigned int riscv_zvknc_bundled_exts[] = {
+ RISCV_ISA_EXT_ZVKN,
+ RISCV_ISA_EXT_ZVBC
+};
+
+static const unsigned int riscv_zvkng_bundled_exts[] = {
+ RISCV_ISA_EXT_ZVKN,
+ RISCV_ISA_EXT_ZVKG
+};
+
+#define RISCV_ISA_EXT_ZVKS \
+ RISCV_ISA_EXT_ZVKSED, \
+ RISCV_ISA_EXT_ZVKSH, \
+ RISCV_ISA_EXT_ZVKB, \
+ RISCV_ISA_EXT_ZVKT
+
+static const unsigned int riscv_zvks_bundled_exts[] = {
+ RISCV_ISA_EXT_ZVKS
+};
+
+static const unsigned int riscv_zvksc_bundled_exts[] = {
+ RISCV_ISA_EXT_ZVKS,
+ RISCV_ISA_EXT_ZVBC
+};
+
+static const unsigned int riscv_zvksg_bundled_exts[] = {
+ RISCV_ISA_EXT_ZVKS,
+ RISCV_ISA_EXT_ZVKG
+};
+
+static const unsigned int riscv_zvbb_exts[] = {
+ RISCV_ISA_EXT_ZVKB
+};
+
/*
* The canonical order of ISA extension names in the ISA string is defined in
* chapter 27 of the unprivileged specification.
@@ -160,10 +248,6 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(d, RISCV_ISA_EXT_d),
__RISCV_ISA_EXT_DATA(q, RISCV_ISA_EXT_q),
__RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c),
- __RISCV_ISA_EXT_DATA(b, RISCV_ISA_EXT_b),
- __RISCV_ISA_EXT_DATA(k, RISCV_ISA_EXT_k),
- __RISCV_ISA_EXT_DATA(j, RISCV_ISA_EXT_j),
- __RISCV_ISA_EXT_DATA(p, RISCV_ISA_EXT_p),
__RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v),
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
@@ -172,11 +256,49 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND),
__RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR),
__RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI),
+ __RISCV_ISA_EXT_DATA(zihintntl, RISCV_ISA_EXT_ZIHINTNTL),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
+ __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
+ __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),
+ __RISCV_ISA_EXT_DATA(zfh, RISCV_ISA_EXT_ZFH),
+ __RISCV_ISA_EXT_DATA(zfhmin, RISCV_ISA_EXT_ZFHMIN),
__RISCV_ISA_EXT_DATA(zba, RISCV_ISA_EXT_ZBA),
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
+ __RISCV_ISA_EXT_DATA(zbc, RISCV_ISA_EXT_ZBC),
+ __RISCV_ISA_EXT_DATA(zbkb, RISCV_ISA_EXT_ZBKB),
+ __RISCV_ISA_EXT_DATA(zbkc, RISCV_ISA_EXT_ZBKC),
+ __RISCV_ISA_EXT_DATA(zbkx, RISCV_ISA_EXT_ZBKX),
__RISCV_ISA_EXT_DATA(zbs, RISCV_ISA_EXT_ZBS),
+ __RISCV_ISA_EXT_BUNDLE(zk, riscv_zk_bundled_exts),
+ __RISCV_ISA_EXT_BUNDLE(zkn, riscv_zkn_bundled_exts),
+ __RISCV_ISA_EXT_DATA(zknd, RISCV_ISA_EXT_ZKND),
+ __RISCV_ISA_EXT_DATA(zkne, RISCV_ISA_EXT_ZKNE),
+ __RISCV_ISA_EXT_DATA(zknh, RISCV_ISA_EXT_ZKNH),
+ __RISCV_ISA_EXT_DATA(zkr, RISCV_ISA_EXT_ZKR),
+ __RISCV_ISA_EXT_BUNDLE(zks, riscv_zks_bundled_exts),
+ __RISCV_ISA_EXT_DATA(zkt, RISCV_ISA_EXT_ZKT),
+ __RISCV_ISA_EXT_DATA(zksed, RISCV_ISA_EXT_ZKSED),
+ __RISCV_ISA_EXT_DATA(zksh, RISCV_ISA_EXT_ZKSH),
+ __RISCV_ISA_EXT_DATA(ztso, RISCV_ISA_EXT_ZTSO),
+ __RISCV_ISA_EXT_SUPERSET(zvbb, RISCV_ISA_EXT_ZVBB, riscv_zvbb_exts),
+ __RISCV_ISA_EXT_DATA(zvbc, RISCV_ISA_EXT_ZVBC),
+ __RISCV_ISA_EXT_DATA(zvfh, RISCV_ISA_EXT_ZVFH),
+ __RISCV_ISA_EXT_DATA(zvfhmin, RISCV_ISA_EXT_ZVFHMIN),
+ __RISCV_ISA_EXT_DATA(zvkb, RISCV_ISA_EXT_ZVKB),
+ __RISCV_ISA_EXT_DATA(zvkg, RISCV_ISA_EXT_ZVKG),
+ __RISCV_ISA_EXT_BUNDLE(zvkn, riscv_zvkn_bundled_exts),
+ __RISCV_ISA_EXT_BUNDLE(zvknc, riscv_zvknc_bundled_exts),
+ __RISCV_ISA_EXT_DATA(zvkned, RISCV_ISA_EXT_ZVKNED),
+ __RISCV_ISA_EXT_BUNDLE(zvkng, riscv_zvkng_bundled_exts),
+ __RISCV_ISA_EXT_DATA(zvknha, RISCV_ISA_EXT_ZVKNHA),
+ __RISCV_ISA_EXT_DATA(zvknhb, RISCV_ISA_EXT_ZVKNHB),
+ __RISCV_ISA_EXT_BUNDLE(zvks, riscv_zvks_bundled_exts),
+ __RISCV_ISA_EXT_BUNDLE(zvksc, riscv_zvksc_bundled_exts),
+ __RISCV_ISA_EXT_DATA(zvksed, RISCV_ISA_EXT_ZVKSED),
+ __RISCV_ISA_EXT_DATA(zvksh, RISCV_ISA_EXT_ZVKSH),
+ __RISCV_ISA_EXT_BUNDLE(zvksg, riscv_zvksg_bundled_exts),
+ __RISCV_ISA_EXT_DATA(zvkt, RISCV_ISA_EXT_ZVKT),
__RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA),
__RISCV_ISA_EXT_DATA(smstateen, RISCV_ISA_EXT_SMSTATEEN),
__RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA),
@@ -189,6 +311,31 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
+static void __init match_isa_ext(const struct riscv_isa_ext_data *ext, const char *name,
+ const char *name_end, struct riscv_isainfo *isainfo)
+{
+ if ((name_end - name == strlen(ext->name)) &&
+ !strncasecmp(name, ext->name, name_end - name)) {
+ /*
+ * If this is a bundle, enable all the ISA extensions that
+ * comprise the bundle.
+ */
+ if (ext->subset_ext_size) {
+ for (int i = 0; i < ext->subset_ext_size; i++) {
+ if (riscv_isa_extension_check(ext->subset_ext_ids[i]))
+ set_bit(ext->subset_ext_ids[i], isainfo->isa);
+ }
+ }
+
+ /*
+ * This is valid even for bundle extensions which uses the RISCV_ISA_EXT_INVALID id
+ * (rejected by riscv_isa_extension_check()).
+ */
+ if (riscv_isa_extension_check(ext->id))
+ set_bit(ext->id, isainfo->isa);
+ }
+}
+
static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct riscv_isainfo *isainfo,
unsigned long *isa2hwcap, const char *isa)
{
@@ -321,14 +468,6 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
if (*isa == '_')
++isa;
-#define SET_ISA_EXT_MAP(name, bit) \
- do { \
- if ((ext_end - ext == strlen(name)) && \
- !strncasecmp(ext, name, strlen(name)) && \
- riscv_isa_extension_check(bit)) \
- set_bit(bit, isainfo->isa); \
- } while (false) \
-
if (unlikely(ext_err))
continue;
if (!ext_long) {
@@ -340,10 +479,8 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
}
} else {
for (int i = 0; i < riscv_isa_ext_count; i++)
- SET_ISA_EXT_MAP(riscv_isa_ext[i].name,
- riscv_isa_ext[i].id);
+ match_isa_ext(&riscv_isa_ext[i], ext, ext_end, isainfo);
}
-#undef SET_ISA_EXT_MAP
}
}
@@ -442,18 +579,26 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
}
for (int i = 0; i < riscv_isa_ext_count; i++) {
+ const struct riscv_isa_ext_data *ext = &riscv_isa_ext[i];
+
if (of_property_match_string(cpu_node, "riscv,isa-extensions",
- riscv_isa_ext[i].property) < 0)
+ ext->property) < 0)
continue;
- if (!riscv_isa_extension_check(riscv_isa_ext[i].id))
- continue;
+ if (ext->subset_ext_size) {
+ for (int j = 0; j < ext->subset_ext_size; j++) {
+ if (riscv_isa_extension_check(ext->subset_ext_ids[i]))
+ set_bit(ext->subset_ext_ids[j], isainfo->isa);
+ }
+ }
- /* Only single letter extensions get set in hwcap */
- if (strnlen(riscv_isa_ext[i].name, 2) == 1)
- this_hwcap |= isa2hwcap[riscv_isa_ext[i].id];
+ if (riscv_isa_extension_check(ext->id)) {
+ set_bit(ext->id, isainfo->isa);
- set_bit(riscv_isa_ext[i].id, isainfo->isa);
+ /* Only single letter extensions get set in hwcap */
+ if (strnlen(riscv_isa_ext[i].name, 2) == 1)
+ this_hwcap |= isa2hwcap[riscv_isa_ext[i].id];
+ }
}
of_node_put(cpu_node);
@@ -643,6 +788,16 @@ static int check_unaligned_access(void *param)
(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
per_cpu(misaligned_access_speed, cpu) = speed;
+
+ /*
+ * Set the value of fast_misaligned_access of a CPU. These operations
+ * are atomic to avoid race conditions.
+ */
+ if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
+ cpumask_set_cpu(cpu, &fast_misaligned_access);
+ else
+ cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
return 0;
}
@@ -655,13 +810,69 @@ static void check_unaligned_access_nonboot_cpu(void *param)
check_unaligned_access(pages[cpu]);
}
+DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
+static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
+{
+ if (cpumask_weight(mask) == weight)
+ static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
+ else
+ static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
+}
+
+static void set_unaligned_access_static_branches_except_cpu(int cpu)
+{
+ /*
+ * Same as set_unaligned_access_static_branches, except excludes the
+ * given CPU from the result. When a CPU is hotplugged into an offline
+ * state, this function is called before the CPU is set to offline in
+ * the cpumask, and thus the CPU needs to be explicitly excluded.
+ */
+
+ cpumask_t fast_except_me;
+
+ cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
+ cpumask_clear_cpu(cpu, &fast_except_me);
+
+ modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
+}
+
+static void set_unaligned_access_static_branches(void)
+{
+ /*
+ * This will be called after check_unaligned_access_all_cpus so the
+ * result of unaligned access speed for all CPUs will be available.
+ *
+ * To avoid the number of online cpus changing between reading
+ * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+ * held before calling this function.
+ */
+
+ cpumask_t fast_and_online;
+
+ cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
+
+ modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
+}
+
+static int lock_and_set_unaligned_access_static_branch(void)
+{
+ cpus_read_lock();
+ set_unaligned_access_static_branches();
+ cpus_read_unlock();
+
+ return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
static int riscv_online_cpu(unsigned int cpu)
{
static struct page *buf;
/* We are already set since the last check */
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
- return 0;
+ goto exit;
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
@@ -671,6 +882,17 @@ static int riscv_online_cpu(unsigned int cpu)
check_unaligned_access(buf);
__free_pages(buf, MISALIGNED_BUFFER_ORDER);
+
+exit:
+ set_unaligned_access_static_branches();
+
+ return 0;
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+ set_unaligned_access_static_branches_except_cpu(cpu);
+
return 0;
}
@@ -705,9 +927,12 @@ static int check_unaligned_access_all_cpus(void)
/* Check core 0. */
smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
- /* Setup hotplug callback for any new CPUs that come online. */
+ /*
+ * Setup hotplug callbacks for any new CPUs that come online or go
+ * offline.
+ */
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
- riscv_online_cpu, NULL);
+ riscv_online_cpu, riscv_offline_cpu);
out:
unaligned_emulation_finish();
diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c
index aa6209a74c83..b64bf1624a05 100644
--- a/arch/riscv/kernel/efi.c
+++ b/arch/riscv/kernel/efi.c
@@ -60,7 +60,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = ptep_get(ptep);
unsigned long val;
if (md->attribute & EFI_MEMORY_RO) {
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 54ca4564a926..9d1a305d5508 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception)
/* Load the kernel shadow call stack pointer if coming from userspace */
scs_load_current_if_task_changed s5
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ move a0, sp
+ call riscv_v_context_nesting_start
+#endif
move a0, sp /* pt_regs */
la ra, ret_from_exception
@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
*/
csrw CSR_SCRATCH, tp
1:
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ move a0, sp
+ call riscv_v_context_nesting_end
+#endif
REG_L a0, PT_STATUS(sp)
/*
* The current load reservation is effectively part of the processor's
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 03a6434a8cdd..f5aa24d9e1c1 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -178,32 +178,28 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
}
#ifdef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ struct pt_regs *regs = arch_ftrace_get_regs(fregs);
+ unsigned long *parent = (unsigned long *)&regs->ra;
+
+ prepare_ftrace_return(parent, ip, frame_pointer(regs));
+}
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
extern void ftrace_graph_call(void);
-extern void ftrace_graph_regs_call(void);
int ftrace_enable_ftrace_graph_caller(void)
{
- int ret;
-
- ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, true, true);
- if (ret)
- return ret;
-
- return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, true, true);
}
int ftrace_disable_ftrace_graph_caller(void)
{
- int ret;
-
- ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
- (unsigned long)&prepare_ftrace_return, false, true);
- if (ret)
- return ret;
-
- return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
(unsigned long)&prepare_ftrace_return, false, true);
}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 76ace1e0b46f..4236a69c35cb 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,7 +11,6 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/csr.h>
-#include <asm/cpu_ops_sbi.h>
#include <asm/hwcap.h>
#include <asm/image.h>
#include <asm/scs.h>
@@ -89,6 +88,7 @@ relocate_enable_mmu:
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
la a1, satp_mode
+ XIP_FIXUP_OFFSET a1
REG_L a1, 0(a1)
or a2, a2, a1
@@ -265,10 +265,12 @@ SYM_CODE_START(_start_kernel)
la sp, _end + THREAD_SIZE
XIP_FIXUP_OFFSET sp
mv s0, a0
+ mv s1, a1
call __copy_data
- /* Restore a0 copy */
+ /* Restore a0 & a1 copy */
mv a0, s0
+ mv a1, s1
#endif
#ifndef CONFIG_XIP_KERNEL
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
new file mode 100644
index 000000000000..6afe80c7f03a
--- /dev/null
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/simd.h>
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+#include <asm/asm-prototypes.h>
+#endif
+
+static inline void riscv_v_flags_set(u32 flags)
+{
+ WRITE_ONCE(current->thread.riscv_v_flags, flags);
+}
+
+static inline void riscv_v_start(u32 flags)
+{
+ int orig;
+
+ orig = riscv_v_flags();
+ BUG_ON((orig & flags) != 0);
+ riscv_v_flags_set(orig | flags);
+ barrier();
+}
+
+static inline void riscv_v_stop(u32 flags)
+{
+ int orig;
+
+ barrier();
+ orig = riscv_v_flags();
+ BUG_ON((orig & flags) == 0);
+ riscv_v_flags_set(orig & ~flags);
+}
+
+/*
+ * Claim ownership of the CPU vector context for use by the calling context.
+ *
+ * The caller may freely manipulate the vector context metadata until
+ * put_cpu_vector_context() is called.
+ */
+void get_cpu_vector_context(void)
+{
+ /*
+ * disable softirqs so it is impossible for softirqs to nest
+ * get_cpu_vector_context() when kernel is actively using Vector.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
+
+ riscv_v_start(RISCV_KERNEL_MODE_V);
+}
+
+/*
+ * Release the CPU vector context.
+ *
+ * Must be called from a context in which get_cpu_vector_context() was
+ * previously called, with no call to put_cpu_vector_context() in the
+ * meantime.
+ */
+void put_cpu_vector_context(void)
+{
+ riscv_v_stop(RISCV_KERNEL_MODE_V);
+
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static __always_inline u32 *riscv_v_flags_ptr(void)
+{
+ return &current->thread.riscv_v_flags;
+}
+
+static inline void riscv_preempt_v_set_dirty(void)
+{
+ *riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_reset_flags(void)
+{
+ *riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_v_ctx_depth_inc(void)
+{
+ *riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline void riscv_v_ctx_depth_dec(void)
+{
+ *riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline u32 riscv_v_ctx_get_depth(void)
+{
+ return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK;
+}
+
+static int riscv_v_stop_kernel_context(void)
+{
+ if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current))
+ return 1;
+
+ riscv_preempt_v_clear_dirty(current);
+ riscv_v_stop(RISCV_PREEMPT_V);
+ return 0;
+}
+
+static int riscv_v_start_kernel_context(bool *is_nested)
+{
+ struct __riscv_v_ext_state *kvstate, *uvstate;
+
+ kvstate = &current->thread.kernel_vstate;
+ if (!kvstate->datap)
+ return -ENOENT;
+
+ if (riscv_preempt_v_started(current)) {
+ WARN_ON(riscv_v_ctx_get_depth() == 0);
+ *is_nested = true;
+ get_cpu_vector_context();
+ if (riscv_preempt_v_dirty(current)) {
+ __riscv_v_vstate_save(kvstate, kvstate->datap);
+ riscv_preempt_v_clear_dirty(current);
+ }
+ riscv_preempt_v_set_restore(current);
+ return 0;
+ }
+
+ /* Transfer the ownership of V from user to kernel, then save */
+ riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
+ if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
+ uvstate = &current->thread.vstate;
+ __riscv_v_vstate_save(uvstate, uvstate->datap);
+ }
+ riscv_preempt_v_clear_dirty(current);
+ return 0;
+}
+
+/* low-level V context handling code, called with irq disabled */
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
+{
+ int depth;
+
+ if (!riscv_preempt_v_started(current))
+ return;
+
+ depth = riscv_v_ctx_get_depth();
+ if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
+ riscv_preempt_v_set_dirty();
+
+ riscv_v_ctx_depth_inc();
+}
+
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
+{
+ struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
+ u32 depth;
+
+ WARN_ON(!irqs_disabled());
+
+ if (!riscv_preempt_v_started(current))
+ return;
+
+ riscv_v_ctx_depth_dec();
+ depth = riscv_v_ctx_get_depth();
+ if (depth == 0) {
+ if (riscv_preempt_v_restore(current)) {
+ __riscv_v_vstate_restore(vstate, vstate->datap);
+ __riscv_v_vstate_clean(regs);
+ riscv_preempt_v_reset_flags();
+ }
+ }
+}
+#else
+#define riscv_v_start_kernel_context(nested) (-ENOENT)
+#define riscv_v_stop_kernel_context() (-ENOENT)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+/*
+ * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the vector registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_vector_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the vector registers until kernel_vector_end() is
+ * called.
+ */
+void kernel_vector_begin(void)
+{
+ bool nested = false;
+
+ if (WARN_ON(!has_vector()))
+ return;
+
+ BUG_ON(!may_use_simd());
+
+ if (riscv_v_start_kernel_context(&nested)) {
+ get_cpu_vector_context();
+ riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+ }
+
+ if (!nested)
+ riscv_v_vstate_set_restore(current, task_pt_regs(current));
+
+ riscv_v_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_begin);
+
+/*
+ * kernel_vector_end(): give the CPU vector registers back to the current task
+ *
+ * Must be called from a context in which kernel_vector_begin() was previously
+ * called, with no call to kernel_vector_end() in the meantime.
+ *
+ * The caller must not use the vector registers after this function is called,
+ * unless kernel_vector_begin() is called again in the meantime.
+ */
+void kernel_vector_end(void)
+{
+ if (WARN_ON(!has_vector()))
+ return;
+
+ riscv_v_disable();
+
+ if (riscv_v_stop_kernel_context())
+ put_cpu_vector_context();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_end);
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 58dd96a2a153..b7561288e8da 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -3,12 +3,12 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/csr.h>
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
-#include <asm-generic/export.h>
#include <asm/ftrace.h>
.text
@@ -57,31 +57,150 @@
.endm
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- .macro SAVE_ALL
+
+/**
+* SAVE_ABI_REGS - save regs against the pt_regs struct
+*
+* @all: tell if saving all the regs
+*
+* If all is set, all the regs will be saved, otherwise only ABI
+* related regs (a0-a7,epc,ra and optional s0) will be saved.
+*
+* After the stack is established,
+*
+* 0(sp) stores the PC of the traced function which can be accessed
+* by &(fregs)->regs->epc in tracing function. Note that the real
+* function entry address should be computed with -FENTRY_RA_OFFSET.
+*
+* 8(sp) stores the function return address (i.e. parent IP) that
+* can be accessed by &(fregs)->regs->ra in tracing function.
+*
+* The other regs are saved at the respective localtion and accessed
+* by the respective pt_regs member.
+*
+* Here is the layout of stack for your reference.
+*
+* PT_SIZE_ON_STACK -> +++++++++
+* + ..... +
+* + t3-t6 +
+* + s2-s11+
+* + a0-a7 + --++++-> ftrace_caller saved
+* + s1 + +
+* + s0 + --+
+* + t0-t2 + +
+* + tp + +
+* + gp + +
+* + sp + +
+* + ra + --+ // parent IP
+* sp -> + epc + --+ // PC
+* +++++++++
+**/
+ .macro SAVE_ABI_REGS, all=0
addi sp, sp, -PT_SIZE_ON_STACK
- REG_S t0, PT_EPC(sp)
- REG_S x1, PT_RA(sp)
- REG_S x2, PT_SP(sp)
- REG_S x3, PT_GP(sp)
- REG_S x4, PT_TP(sp)
- REG_S x5, PT_T0(sp)
- save_from_x6_to_x31
+ REG_S t0, PT_EPC(sp)
+ REG_S x1, PT_RA(sp)
+
+ // save the ABI regs
+
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+
+ // save the leftover regs
+
+ .if \all == 1
+ REG_S x2, PT_SP(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+ REG_S x5, PT_T0(sp)
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x8, PT_S0(sp)
+ REG_S x9, PT_S1(sp)
+ REG_S x18, PT_S2(sp)
+ REG_S x19, PT_S3(sp)
+ REG_S x20, PT_S4(sp)
+ REG_S x21, PT_S5(sp)
+ REG_S x22, PT_S6(sp)
+ REG_S x23, PT_S7(sp)
+ REG_S x24, PT_S8(sp)
+ REG_S x25, PT_S9(sp)
+ REG_S x26, PT_S10(sp)
+ REG_S x27, PT_S11(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
+
+ // save s0 if FP_TEST defined
+
+ .else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ REG_S x8, PT_S0(sp)
+#endif
+ .endif
.endm
- .macro RESTORE_ALL
- REG_L x1, PT_RA(sp)
- REG_L x2, PT_SP(sp)
- REG_L x3, PT_GP(sp)
- REG_L x4, PT_TP(sp)
- /* Restore t0 with PT_EPC */
- REG_L x5, PT_EPC(sp)
- restore_from_x6_to_x31
+ .macro RESTORE_ABI_REGS, all=0
+ REG_L t0, PT_EPC(sp)
+ REG_L x1, PT_RA(sp)
+ REG_L x10, PT_A0(sp)
+ REG_L x11, PT_A1(sp)
+ REG_L x12, PT_A2(sp)
+ REG_L x13, PT_A3(sp)
+ REG_L x14, PT_A4(sp)
+ REG_L x15, PT_A5(sp)
+ REG_L x16, PT_A6(sp)
+ REG_L x17, PT_A7(sp)
+ .if \all == 1
+ REG_L x2, PT_SP(sp)
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+ REG_L x9, PT_S1(sp)
+ REG_L x18, PT_S2(sp)
+ REG_L x19, PT_S3(sp)
+ REG_L x20, PT_S4(sp)
+ REG_L x21, PT_S5(sp)
+ REG_L x22, PT_S6(sp)
+ REG_L x23, PT_S7(sp)
+ REG_L x24, PT_S8(sp)
+ REG_L x25, PT_S9(sp)
+ REG_L x26, PT_S10(sp)
+ REG_L x27, PT_S11(sp)
+ REG_L x28, PT_T3(sp)
+ REG_L x29, PT_T4(sp)
+ REG_L x30, PT_T5(sp)
+ REG_L x31, PT_T6(sp)
+
+ .else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ REG_L x8, PT_S0(sp)
+#endif
+ .endif
addi sp, sp, PT_SIZE_ON_STACK
.endm
+
+ .macro PREPARE_ARGS
+ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+ mv a1, ra
+ mv a3, sp
+ .endm
+
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
SYM_FUNC_START(ftrace_caller)
SAVE_ABI
@@ -105,34 +224,39 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
call ftrace_stub
#endif
RESTORE_ABI
- jr t0
+ jr t0
SYM_FUNC_END(ftrace_caller)
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
SYM_FUNC_START(ftrace_regs_caller)
- SAVE_ALL
-
- addi a0, t0, -FENTRY_RA_OFFSET
- la a1, function_trace_op
- REG_L a2, 0(a1)
- mv a1, ra
- mv a3, sp
+ mv t1, zero
+ SAVE_ABI_REGS 1
+ PREPARE_ARGS
SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
call ftrace_stub
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi a0, sp, PT_RA
- REG_L a1, PT_EPC(sp)
- addi a1, a1, -FENTRY_RA_OFFSET
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- mv a2, s0
-#endif
-SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL)
+ RESTORE_ABI_REGS 1
+ bnez t1, .Ldirect
+ jr t0
+.Ldirect:
+ jr t1
+SYM_FUNC_END(ftrace_regs_caller)
+
+SYM_FUNC_START(ftrace_caller)
+ SAVE_ABI_REGS 0
+ PREPARE_ARGS
+
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
call ftrace_stub
-#endif
- RESTORE_ALL
- jr t0
-SYM_FUNC_END(ftrace_regs_caller)
+ RESTORE_ABI_REGS 0
+ jr t0
+SYM_FUNC_END(ftrace_caller)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_CODE_START(ftrace_stub_direct_tramp)
+ jr t0
+SYM_CODE_END(ftrace_stub_direct_tramp)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index b4dd9ed6849e..d7ec69ac6910 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -4,12 +4,12 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/cfi_types.h>
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/csr.h>
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
-#include <asm-generic/export.h>
#include <asm/ftrace.h>
.text
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index aac019ed63b1..5e5a82644451 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -723,8 +723,8 @@ static int add_relocation_to_accumulate(struct module *me, int type,
if (!bucket) {
kfree(entry);
- kfree(rel_head);
kfree(rel_head->rel_entry);
+ kfree(rel_head);
return -ENOMEM;
}
@@ -747,6 +747,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
{
/* Can safely assume that bits is not greater than sizeof(long) */
unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
+ /*
+ * When hashtable_size == 1, hashtable_bits == 0.
+ * This is valid because the hashing algorithm returns 0 in this case.
+ */
unsigned int hashtable_bits = ilog2(hashtable_size);
/*
@@ -760,10 +764,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
hashtable_size <<= should_double_size;
*relocation_hashtable = kmalloc_array(hashtable_size,
- sizeof(*relocation_hashtable),
+ sizeof(**relocation_hashtable),
GFP_KERNEL);
if (!*relocation_hashtable)
- return -ENOMEM;
+ return 0;
__hash_init(*relocation_hashtable, hashtable_size);
@@ -779,6 +783,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
Elf_Sym *sym;
void *location;
unsigned int i, type;
+ unsigned int j_idx = 0;
Elf_Addr v;
int res;
unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
@@ -789,8 +794,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
hashtable_bits = initialize_relocation_hashtable(num_relocations,
&relocation_hashtable);
- if (hashtable_bits < 0)
- return hashtable_bits;
+ if (!relocation_hashtable)
+ return -ENOMEM;
INIT_LIST_HEAD(&used_buckets_list);
@@ -829,9 +834,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
v = sym->st_value + rel[i].r_addend;
if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) {
- unsigned int j;
+ unsigned int j = j_idx;
+ bool found = false;
- for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) {
+ do {
unsigned long hi20_loc =
sechdrs[sechdrs[relsec].sh_info].sh_addr
+ rel[j].r_offset;
@@ -860,16 +866,26 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
hi20 = (offset + 0x800) & 0xfffff000;
lo12 = offset - hi20;
v = lo12;
+ found = true;
break;
}
- }
- if (j == sechdrs[relsec].sh_size / sizeof(*rel)) {
+
+ j++;
+ if (j > sechdrs[relsec].sh_size / sizeof(*rel))
+ j = 0;
+
+ } while (j_idx != j);
+
+ if (!found) {
pr_err(
"%s: Can not find HI20 relocation information\n",
me->name);
return -EINVAL;
}
+
+ /* Record the previous j-loop end index */
+ j_idx = j;
}
if (reloc_handlers[type].accumulate_handler)
@@ -894,7 +910,8 @@ void *module_alloc(unsigned long size)
{
return __vmalloc_node_range(size, 1, MODULES_VADDR,
MODULES_END, GFP_KERNEL,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
+ PAGE_KERNEL, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE,
__builtin_return_address(0));
}
#endif
diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
new file mode 100644
index 000000000000..8e114f5930ce
--- /dev/null
+++ b/arch/riscv/kernel/paravirt.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "riscv-pv: " fmt
+
+#include <linux/cpuhotplug.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/percpu-defs.h>
+#include <linux/printk.h>
+#include <linux/static_call.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/page.h>
+#include <asm/paravirt.h>
+#include <asm/sbi.h>
+
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
+
+static bool __init has_pv_steal_clock(void)
+{
+ if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+ sbi_probe_extension(SBI_EXT_STA) > 0) {
+ pr_info("SBI STA extension detected\n");
+ return true;
+ }
+
+ return false;
+}
+
+static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
+ unsigned long flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
+ lo, hi, flags, 0, 0, 0);
+ if (ret.error) {
+ if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
+ pr_warn("Failed to disable steal-time shmem");
+ else
+ pr_warn("Failed to set steal-time shmem");
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ return 0;
+}
+
+static int pv_time_cpu_online(unsigned int cpu)
+{
+ struct sbi_sta_struct *st = this_cpu_ptr(&steal_time);
+ phys_addr_t pa = __pa(st);
+ unsigned long lo = (unsigned long)pa;
+ unsigned long hi = IS_ENABLED(CONFIG_32BIT) ? upper_32_bits((u64)pa) : 0;
+
+ return sbi_sta_steal_time_set_shmem(lo, hi, 0);
+}
+
+static int pv_time_cpu_down_prepare(unsigned int cpu)
+{
+ return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
+ SBI_STA_SHMEM_DISABLE, 0);
+}
+
+static u64 pv_time_steal_clock(int cpu)
+{
+ struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
+ u32 sequence;
+ u64 steal;
+
+ /*
+ * Check the sequence field before and after reading the steal
+ * field. Repeat the read if it is different or odd.
+ */
+ do {
+ sequence = READ_ONCE(st->sequence);
+ virt_rmb();
+ steal = READ_ONCE(st->steal);
+ virt_rmb();
+ } while ((le32_to_cpu(sequence) & 1) ||
+ sequence != READ_ONCE(st->sequence));
+
+ return le64_to_cpu(steal);
+}
+
+int __init pv_time_init(void)
+{
+ int ret;
+
+ if (!has_pv_steal_clock())
+ return 0;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "riscv/pv_time:online",
+ pv_time_cpu_online,
+ pv_time_cpu_down_prepare);
+ if (ret < 0)
+ return ret;
+
+ static_call_update(pv_steal_clock, pv_time_steal_clock);
+
+ static_key_slow_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
+
+ pr_info("Computing paravirt steal-time\n");
+
+ return 0;
+}
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 13ee7bf589a1..37e87fdcf6a0 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -14,6 +14,7 @@
#include <asm/fixmap.h>
#include <asm/ftrace.h>
#include <asm/patch.h>
+#include <asm/sections.h>
struct patch_insn {
void *addr;
@@ -25,6 +26,14 @@ struct patch_insn {
int riscv_patch_in_stop_machine = false;
#ifdef CONFIG_MMU
+
+static inline bool is_kernel_exittext(uintptr_t addr)
+{
+ return system_state < SYSTEM_RUNNING &&
+ addr >= (uintptr_t)__exittext_begin &&
+ addr < (uintptr_t)__exittext_end;
+}
+
/*
* The fix_to_virt(, idx) needs a const value (not a dynamic variable of
* reg-a0) or BUILD_BUG_ON failed with "idx >= __end_of_fixed_addresses".
@@ -35,7 +44,7 @@ static __always_inline void *patch_map(void *addr, const unsigned int fixmap)
uintptr_t uintaddr = (uintptr_t) addr;
struct page *page;
- if (core_kernel_text(uintaddr))
+ if (core_kernel_text(uintaddr) || is_kernel_exittext(uintaddr))
page = phys_to_page(__pa_symbol(addr));
else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
page = vmalloc_to_page(addr);
diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c
index 68e786c84c94..f6d4dedffb84 100644
--- a/arch/riscv/kernel/pi/cmdline_early.c
+++ b/arch/riscv/kernel/pi/cmdline_early.c
@@ -38,8 +38,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa)
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) {
- strncat(early_cmdline, CONFIG_CMDLINE,
- COMMAND_LINE_SIZE - fdt_cmdline_size);
+ strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
}
return early_cmdline;
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 4f21d970a129..92922dbd5b5c 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -171,6 +171,7 @@ void flush_thread(void)
riscv_v_vstate_off(task_pt_regs(current));
kfree(current->thread.vstate.datap);
memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+ clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif
}
@@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk)
{
/* Free the vector context of datap. */
if (has_vector())
- kfree(tsk->thread.vstate.datap);
+ riscv_v_thread_free(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
@@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*dst = *src;
/* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+ memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
+ clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
return 0;
}
@@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
childregs->a0 = 0; /* Return value of fork() */
p->thread.s[0] = 0;
}
+ p->thread.riscv_v_flags = 0;
+ if (has_vector())
+ riscv_v_thread_alloc(p);
p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
}
+
+void __init arch_task_cache_init(void)
+{
+ riscv_v_setup_ctx_cache();
+}
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 2afe460de16a..e8515aa9d80b 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target,
* Ensure the vector registers have been saved to the memory before
* copying them to membuf.
*/
- if (target == current)
- riscv_v_vstate_save(current, task_pt_regs(current));
+ if (target == current) {
+ get_cpu_vector_context();
+ riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+ put_cpu_vector_context();
+ }
ptrace_vstate.vstart = vstate->vstart;
ptrace_vstate.vl = vstate->vl;
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 5a62ed1da453..e66e0999a800 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -7,6 +7,7 @@
#include <linux/bits.h>
#include <linux/init.h>
+#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/reboot.h>
#include <asm/sbi.h>
@@ -571,6 +572,66 @@ long sbi_get_mimpid(void)
}
EXPORT_SYMBOL_GPL(sbi_get_mimpid);
+bool sbi_debug_console_available;
+
+int sbi_debug_console_write(const char *bytes, unsigned int num_bytes)
+{
+ phys_addr_t base_addr;
+ struct sbiret ret;
+
+ if (!sbi_debug_console_available)
+ return -EOPNOTSUPP;
+
+ if (is_vmalloc_addr(bytes))
+ base_addr = page_to_phys(vmalloc_to_page(bytes)) +
+ offset_in_page(bytes);
+ else
+ base_addr = __pa(bytes);
+ if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
+ num_bytes = PAGE_SIZE - offset_in_page(bytes);
+
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+ num_bytes, lower_32_bits(base_addr),
+ upper_32_bits(base_addr), 0, 0, 0);
+ else
+ ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+ num_bytes, base_addr, 0, 0, 0, 0);
+
+ if (ret.error == SBI_ERR_FAILURE)
+ return -EIO;
+ return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
+int sbi_debug_console_read(char *bytes, unsigned int num_bytes)
+{
+ phys_addr_t base_addr;
+ struct sbiret ret;
+
+ if (!sbi_debug_console_available)
+ return -EOPNOTSUPP;
+
+ if (is_vmalloc_addr(bytes))
+ base_addr = page_to_phys(vmalloc_to_page(bytes)) +
+ offset_in_page(bytes);
+ else
+ base_addr = __pa(bytes);
+ if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
+ num_bytes = PAGE_SIZE - offset_in_page(bytes);
+
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+ num_bytes, lower_32_bits(base_addr),
+ upper_32_bits(base_addr), 0, 0, 0);
+ else
+ ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+ num_bytes, base_addr, 0, 0, 0, 0);
+
+ if (ret.error == SBI_ERR_FAILURE)
+ return -EIO;
+ return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
void __init sbi_init(void)
{
int ret;
@@ -612,6 +673,11 @@ void __init sbi_init(void)
sbi_srst_reboot_nb.priority = 192;
register_restart_handler(&sbi_srst_reboot_nb);
}
+ if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
+ (sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
+ pr_info("SBI DBCN extension detected\n");
+ sbi_debug_console_available = true;
+ }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 535a837de55d..4f73c0ae44b2 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -26,7 +26,6 @@
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
-#include <asm/cpu_ops.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
@@ -51,7 +50,6 @@ atomic_t hart_lottery __section(".sdata")
#endif
;
unsigned long boot_cpu_hartid;
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
/*
* Place kernel memory regions on the resource tree so that
@@ -299,23 +297,10 @@ void __init setup_arch(char **cmdline_p)
riscv_user_isa_enable();
}
-static int __init topology_init(void)
+bool arch_cpu_is_hotpluggable(int cpu)
{
- int i, ret;
-
- for_each_possible_cpu(i) {
- struct cpu *cpu = &per_cpu(cpu_devices, i);
-
- cpu->hotpluggable = cpu_has_hotplug(i);
- ret = register_cpu(cpu, i);
- if (unlikely(ret))
- pr_warn("Warning: %s: register_cpu %d failed (%d)\n",
- __func__, i, ret);
- }
-
- return 0;
+ return cpu_has_hotplug(cpu);
}
-subsys_initcall(topology_init);
void free_initmem(void)
{
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 88b6220b2608..501e66debf69 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -86,12 +86,15 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
/* datap is designed to be 16 byte aligned for better performance */
WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
- riscv_v_vstate_save(current, regs);
+ get_cpu_vector_context();
+ riscv_v_vstate_save(&current->thread.vstate, regs);
+ put_cpu_vector_context();
+
/* Copy everything of vstate but datap. */
err = __copy_to_user(&state->v_state, &current->thread.vstate,
offsetof(struct __riscv_v_ext_state, datap));
/* Copy the pointer datap itself. */
- err |= __put_user(datap, &state->v_state.datap);
+ err |= __put_user((__force void *)datap, &state->v_state.datap);
/* Copy the whole vector content to user space datap. */
err |= __copy_to_user(datap, current->thread.vstate.datap, riscv_v_vsize);
/* Copy magic to the user space after saving all vector conetext */
@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
if (unlikely(err))
return err;
- riscv_v_vstate_restore(current, regs);
+ riscv_v_vstate_set_restore(current, regs);
return err;
}
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 40420afbb1a0..45dd4035416e 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -81,7 +81,7 @@ static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_has_hotplug(cpu))
- cpu_ops[cpu]->cpu_stop();
+ cpu_ops->cpu_stop();
#endif
for(;;)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index d162bf339beb..519b6bd946e5 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -49,7 +49,6 @@ void __init smp_prepare_boot_cpu(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int cpuid;
- int ret;
unsigned int curr_cpuid;
init_cpu_topology();
@@ -66,11 +65,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(cpuid) {
if (cpuid == curr_cpuid)
continue;
- if (cpu_ops[cpuid]->cpu_prepare) {
- ret = cpu_ops[cpuid]->cpu_prepare(cpuid);
- if (ret)
- continue;
- }
set_cpu_present(cpuid, true);
numa_store_cpu_info(cpuid);
}
@@ -125,18 +119,7 @@ static int __init acpi_parse_rintc(union acpi_subtable_headers *header, const un
static void __init acpi_parse_and_init_cpus(void)
{
- int cpuid;
-
- cpu_set_ops(0);
-
acpi_table_parse_madt(ACPI_MADT_TYPE_RINTC, acpi_parse_rintc, 0);
-
- for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) {
- if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) {
- cpu_set_ops(cpuid);
- set_cpu_possible(cpuid, true);
- }
- }
}
#else
#define acpi_parse_and_init_cpus(...) do { } while (0)
@@ -150,8 +133,6 @@ static void __init of_parse_and_init_cpus(void)
int cpuid = 1;
int rc;
- cpu_set_ops(0);
-
for_each_of_cpu_node(dn) {
rc = riscv_early_of_processor_hartid(dn, &hart);
if (rc < 0)
@@ -179,27 +160,28 @@ static void __init of_parse_and_init_cpus(void)
if (cpuid > nr_cpu_ids)
pr_warn("Total number of cpus [%d] is greater than nr_cpus option value [%d]\n",
cpuid, nr_cpu_ids);
-
- for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) {
- if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) {
- cpu_set_ops(cpuid);
- set_cpu_possible(cpuid, true);
- }
- }
}
void __init setup_smp(void)
{
+ int cpuid;
+
+ cpu_set_ops();
+
if (acpi_disabled)
of_parse_and_init_cpus();
else
acpi_parse_and_init_cpus();
+
+ for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++)
+ if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID)
+ set_cpu_possible(cpuid, true);
}
static int start_secondary_cpu(int cpu, struct task_struct *tidle)
{
- if (cpu_ops[cpu]->cpu_start)
- return cpu_ops[cpu]->cpu_start(cpu, tidle);
+ if (cpu_ops->cpu_start)
+ return cpu_ops->cpu_start(cpu, tidle);
return -EOPNOTSUPP;
}
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 3c89b8ec69c4..239509367e42 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -4,8 +4,12 @@
* Copyright (c) 2022 Ventana Micro Systems Inc.
*/
+#define pr_fmt(fmt) "suspend: " fmt
+
#include <linux/ftrace.h>
+#include <linux/suspend.h>
#include <asm/csr.h>
+#include <asm/sbi.h>
#include <asm/suspend.h>
void suspend_save_csrs(struct suspend_context *context)
@@ -85,3 +89,43 @@ int cpu_suspend(unsigned long arg,
return rc;
}
+
+#ifdef CONFIG_RISCV_SBI
+static int sbi_system_suspend(unsigned long sleep_type,
+ unsigned long resume_addr,
+ unsigned long opaque)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND,
+ sleep_type, resume_addr, opaque, 0, 0, 0);
+ if (ret.error)
+ return sbi_err_map_linux_errno(ret.error);
+
+ return ret.value;
+}
+
+static int sbi_system_suspend_enter(suspend_state_t state)
+{
+ return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend);
+}
+
+static const struct platform_suspend_ops sbi_system_suspend_ops = {
+ .valid = suspend_valid_only_mem,
+ .enter = sbi_system_suspend_enter,
+};
+
+static int __init sbi_system_suspend_init(void)
+{
+ if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+ sbi_probe_extension(SBI_EXT_SUSP) > 0) {
+ pr_info("SBI SUSP extension detected\n");
+ if (IS_ENABLED(CONFIG_SUSPEND))
+ suspend_set_ops(&sbi_system_suspend_ops);
+ }
+
+ return 0;
+}
+
+arch_initcall(sbi_system_suspend_init);
+#endif /* CONFIG_RISCV_SBI */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
new file mode 100644
index 000000000000..a7c56b41efd2
--- /dev/null
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The hwprobe interface, for allowing userspace to probe to see which features
+ * are supported by the hardware. See Documentation/arch/riscv/hwprobe.rst for
+ * more details.
+ */
+#include <linux/syscalls.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/hwprobe.h>
+#include <asm/sbi.h>
+#include <asm/switch_to.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/vector.h>
+#include <vdso/vsyscall.h>
+
+
+static void hwprobe_arch_id(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ u64 id = -1ULL;
+ bool first = true;
+ int cpu;
+
+ for_each_cpu(cpu, cpus) {
+ u64 cpu_id;
+
+ switch (pair->key) {
+ case RISCV_HWPROBE_KEY_MVENDORID:
+ cpu_id = riscv_cached_mvendorid(cpu);
+ break;
+ case RISCV_HWPROBE_KEY_MIMPID:
+ cpu_id = riscv_cached_mimpid(cpu);
+ break;
+ case RISCV_HWPROBE_KEY_MARCHID:
+ cpu_id = riscv_cached_marchid(cpu);
+ break;
+ }
+
+ if (first) {
+ id = cpu_id;
+ first = false;
+ }
+
+ /*
+ * If there's a mismatch for the given set, return -1 in the
+ * value.
+ */
+ if (id != cpu_id) {
+ id = -1ULL;
+ break;
+ }
+ }
+
+ pair->value = id;
+}
+
+static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ int cpu;
+ u64 missing = 0;
+
+ pair->value = 0;
+ if (has_fpu())
+ pair->value |= RISCV_HWPROBE_IMA_FD;
+
+ if (riscv_isa_extension_available(NULL, c))
+ pair->value |= RISCV_HWPROBE_IMA_C;
+
+ if (has_vector())
+ pair->value |= RISCV_HWPROBE_IMA_V;
+
+ /*
+ * Loop through and record extensions that 1) anyone has, and 2) anyone
+ * doesn't have.
+ */
+ for_each_cpu(cpu, cpus) {
+ struct riscv_isainfo *isainfo = &hart_isa[cpu];
+
+#define EXT_KEY(ext) \
+ do { \
+ if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \
+ pair->value |= RISCV_HWPROBE_EXT_##ext; \
+ else \
+ missing |= RISCV_HWPROBE_EXT_##ext; \
+ } while (false)
+
+ /*
+ * Only use EXT_KEY() for extensions which can be exposed to userspace,
+ * regardless of the kernel's configuration, as no other checks, besides
+ * presence in the hart_isa bitmap, are made.
+ */
+ EXT_KEY(ZBA);
+ EXT_KEY(ZBB);
+ EXT_KEY(ZBS);
+ EXT_KEY(ZICBOZ);
+ EXT_KEY(ZBC);
+
+ EXT_KEY(ZBKB);
+ EXT_KEY(ZBKC);
+ EXT_KEY(ZBKX);
+ EXT_KEY(ZKND);
+ EXT_KEY(ZKNE);
+ EXT_KEY(ZKNH);
+ EXT_KEY(ZKSED);
+ EXT_KEY(ZKSH);
+ EXT_KEY(ZKT);
+ EXT_KEY(ZIHINTNTL);
+ EXT_KEY(ZTSO);
+ EXT_KEY(ZACAS);
+ EXT_KEY(ZICOND);
+
+ if (has_vector()) {
+ EXT_KEY(ZVBB);
+ EXT_KEY(ZVBC);
+ EXT_KEY(ZVKB);
+ EXT_KEY(ZVKG);
+ EXT_KEY(ZVKNED);
+ EXT_KEY(ZVKNHA);
+ EXT_KEY(ZVKNHB);
+ EXT_KEY(ZVKSED);
+ EXT_KEY(ZVKSH);
+ EXT_KEY(ZVKT);
+ EXT_KEY(ZVFH);
+ EXT_KEY(ZVFHMIN);
+ }
+
+ if (has_fpu()) {
+ EXT_KEY(ZFH);
+ EXT_KEY(ZFHMIN);
+ EXT_KEY(ZFA);
+ }
+#undef EXT_KEY
+ }
+
+ /* Now turn off reporting features if any CPU is missing it. */
+ pair->value &= ~missing;
+}
+
+static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+{
+ struct riscv_hwprobe pair;
+
+ hwprobe_isa_ext0(&pair, cpus);
+ return (pair.value & ext);
+}
+
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+ int cpu;
+ u64 perf = -1ULL;
+
+ for_each_cpu(cpu, cpus) {
+ int this_perf = per_cpu(misaligned_access_speed, cpu);
+
+ if (perf == -1ULL)
+ perf = this_perf;
+
+ if (perf != this_perf) {
+ perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ break;
+ }
+ }
+
+ if (perf == -1ULL)
+ return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+ return perf;
+}
+
+static void hwprobe_one_pair(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ switch (pair->key) {
+ case RISCV_HWPROBE_KEY_MVENDORID:
+ case RISCV_HWPROBE_KEY_MARCHID:
+ case RISCV_HWPROBE_KEY_MIMPID:
+ hwprobe_arch_id(pair, cpus);
+ break;
+ /*
+ * The kernel already assumes that the base single-letter ISA
+ * extensions are supported on all harts, and only supports the
+ * IMA base, so just cheat a bit here and tell that to
+ * userspace.
+ */
+ case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
+ pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA;
+ break;
+
+ case RISCV_HWPROBE_KEY_IMA_EXT_0:
+ hwprobe_isa_ext0(pair, cpus);
+ break;
+
+ case RISCV_HWPROBE_KEY_CPUPERF_0:
+ pair->value = hwprobe_misaligned(cpus);
+ break;
+
+ case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
+ pair->value = 0;
+ if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
+ pair->value = riscv_cboz_block_size;
+ break;
+
+ /*
+ * For forward compatibility, unknown keys don't fail the whole
+ * call, but get their element key set to -1 and value set to 0
+ * indicating they're unrecognized.
+ */
+ default:
+ pair->key = -1;
+ pair->value = 0;
+ break;
+ }
+}
+
+static int hwprobe_get_values(struct riscv_hwprobe __user *pairs,
+ size_t pair_count, size_t cpusetsize,
+ unsigned long __user *cpus_user,
+ unsigned int flags)
+{
+ size_t out;
+ int ret;
+ cpumask_t cpus;
+
+ /* Check the reserved flags. */
+ if (flags != 0)
+ return -EINVAL;
+
+ /*
+ * The interface supports taking in a CPU mask, and returns values that
+ * are consistent across that mask. Allow userspace to specify NULL and
+ * 0 as a shortcut to all online CPUs.
+ */
+ cpumask_clear(&cpus);
+ if (!cpusetsize && !cpus_user) {
+ cpumask_copy(&cpus, cpu_online_mask);
+ } else {
+ if (cpusetsize > cpumask_size())
+ cpusetsize = cpumask_size();
+
+ ret = copy_from_user(&cpus, cpus_user, cpusetsize);
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * Userspace must provide at least one online CPU, without that
+ * there's no way to define what is supported.
+ */
+ cpumask_and(&cpus, &cpus, cpu_online_mask);
+ if (cpumask_empty(&cpus))
+ return -EINVAL;
+ }
+
+ for (out = 0; out < pair_count; out++, pairs++) {
+ struct riscv_hwprobe pair;
+
+ if (get_user(pair.key, &pairs->key))
+ return -EFAULT;
+
+ pair.value = 0;
+ hwprobe_one_pair(&pair, &cpus);
+ ret = put_user(pair.key, &pairs->key);
+ if (ret == 0)
+ ret = put_user(pair.value, &pairs->value);
+
+ if (ret)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs,
+ size_t pair_count, size_t cpusetsize,
+ unsigned long __user *cpus_user,
+ unsigned int flags)
+{
+ cpumask_t cpus, one_cpu;
+ bool clear_all = false;
+ size_t i;
+ int ret;
+
+ if (flags != RISCV_HWPROBE_WHICH_CPUS)
+ return -EINVAL;
+
+ if (!cpusetsize || !cpus_user)
+ return -EINVAL;
+
+ if (cpusetsize > cpumask_size())
+ cpusetsize = cpumask_size();
+
+ ret = copy_from_user(&cpus, cpus_user, cpusetsize);
+ if (ret)
+ return -EFAULT;
+
+ if (cpumask_empty(&cpus))
+ cpumask_copy(&cpus, cpu_online_mask);
+
+ cpumask_and(&cpus, &cpus, cpu_online_mask);
+
+ cpumask_clear(&one_cpu);
+
+ for (i = 0; i < pair_count; i++) {
+ struct riscv_hwprobe pair, tmp;
+ int cpu;
+
+ ret = copy_from_user(&pair, &pairs[i], sizeof(pair));
+ if (ret)
+ return -EFAULT;
+
+ if (!riscv_hwprobe_key_is_valid(pair.key)) {
+ clear_all = true;
+ pair = (struct riscv_hwprobe){ .key = -1, };
+ ret = copy_to_user(&pairs[i], &pair, sizeof(pair));
+ if (ret)
+ return -EFAULT;
+ }
+
+ if (clear_all)
+ continue;
+
+ tmp = (struct riscv_hwprobe){ .key = pair.key, };
+
+ for_each_cpu(cpu, &cpus) {
+ cpumask_set_cpu(cpu, &one_cpu);
+
+ hwprobe_one_pair(&tmp, &one_cpu);
+
+ if (!riscv_hwprobe_pair_cmp(&tmp, &pair))
+ cpumask_clear_cpu(cpu, &cpus);
+
+ cpumask_clear_cpu(cpu, &one_cpu);
+ }
+ }
+
+ if (clear_all)
+ cpumask_clear(&cpus);
+
+ ret = copy_to_user(cpus_user, &cpus, cpusetsize);
+ if (ret)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
+ size_t pair_count, size_t cpusetsize,
+ unsigned long __user *cpus_user,
+ unsigned int flags)
+{
+ if (flags & RISCV_HWPROBE_WHICH_CPUS)
+ return hwprobe_get_cpus(pairs, pair_count, cpusetsize,
+ cpus_user, flags);
+
+ return hwprobe_get_values(pairs, pair_count, cpusetsize,
+ cpus_user, flags);
+}
+
+#ifdef CONFIG_MMU
+
+static int __init init_hwprobe_vdso_data(void)
+{
+ struct vdso_data *vd = __arch_get_k_vdso_data();
+ struct arch_vdso_data *avd = &vd->arch_data;
+ u64 id_bitsmash = 0;
+ struct riscv_hwprobe pair;
+ int key;
+
+ /*
+ * Initialize vDSO data with the answers for the "all CPUs" case, to
+ * save a syscall in the common case.
+ */
+ for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) {
+ pair.key = key;
+ hwprobe_one_pair(&pair, cpu_online_mask);
+
+ WARN_ON_ONCE(pair.key < 0);
+
+ avd->all_cpu_hwprobe_values[key] = pair.value;
+ /*
+ * Smash together the vendor, arch, and impl IDs to see if
+ * they're all 0 or any negative.
+ */
+ if (key <= RISCV_HWPROBE_KEY_MIMPID)
+ id_bitsmash |= pair.value;
+ }
+
+ /*
+ * If the arch, vendor, and implementation ID are all the same across
+ * all harts, then assume all CPUs are the same, and allow the vDSO to
+ * answer queries for arbitrary masks. However if all values are 0 (not
+ * populated) or any value returns -1 (varies across CPUs), then the
+ * vDSO should defer to the kernel for exotic cpu masks.
+ */
+ avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
+ return 0;
+}
+
+arch_initcall_sync(init_hwprobe_vdso_data);
+
+#endif /* CONFIG_MMU */
+
+SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
+ size_t, pair_count, size_t, cpusetsize, unsigned long __user *,
+ cpus, unsigned int, flags)
+{
+ return do_riscv_hwprobe(pairs, pair_count, cpusetsize,
+ cpus, flags);
+}
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index a2ca5b7756a5..f1c1416a9f1e 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -7,15 +7,7 @@
#include <linux/syscalls.h>
#include <asm/cacheflush.h>
-#include <asm/cpufeature.h>
-#include <asm/hwprobe.h>
-#include <asm/sbi.h>
-#include <asm/vector.h>
-#include <asm/switch_to.h>
-#include <asm/uaccess.h>
-#include <asm/unistd.h>
#include <asm-generic/mman-common.h>
-#include <vdso/vsyscall.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -77,283 +69,6 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
return 0;
}
-/*
- * The hwprobe interface, for allowing userspace to probe to see which features
- * are supported by the hardware. See Documentation/arch/riscv/hwprobe.rst for more
- * details.
- */
-static void hwprobe_arch_id(struct riscv_hwprobe *pair,
- const struct cpumask *cpus)
-{
- u64 id = -1ULL;
- bool first = true;
- int cpu;
-
- for_each_cpu(cpu, cpus) {
- u64 cpu_id;
-
- switch (pair->key) {
- case RISCV_HWPROBE_KEY_MVENDORID:
- cpu_id = riscv_cached_mvendorid(cpu);
- break;
- case RISCV_HWPROBE_KEY_MIMPID:
- cpu_id = riscv_cached_mimpid(cpu);
- break;
- case RISCV_HWPROBE_KEY_MARCHID:
- cpu_id = riscv_cached_marchid(cpu);
- break;
- }
-
- if (first) {
- id = cpu_id;
- first = false;
- }
-
- /*
- * If there's a mismatch for the given set, return -1 in the
- * value.
- */
- if (id != cpu_id) {
- id = -1ULL;
- break;
- }
- }
-
- pair->value = id;
-}
-
-static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
- const struct cpumask *cpus)
-{
- int cpu;
- u64 missing = 0;
-
- pair->value = 0;
- if (has_fpu())
- pair->value |= RISCV_HWPROBE_IMA_FD;
-
- if (riscv_isa_extension_available(NULL, c))
- pair->value |= RISCV_HWPROBE_IMA_C;
-
- if (has_vector())
- pair->value |= RISCV_HWPROBE_IMA_V;
-
- /*
- * Loop through and record extensions that 1) anyone has, and 2) anyone
- * doesn't have.
- */
- for_each_cpu(cpu, cpus) {
- struct riscv_isainfo *isainfo = &hart_isa[cpu];
-
-#define EXT_KEY(ext) \
- do { \
- if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \
- pair->value |= RISCV_HWPROBE_EXT_##ext; \
- else \
- missing |= RISCV_HWPROBE_EXT_##ext; \
- } while (false)
-
- /*
- * Only use EXT_KEY() for extensions which can be exposed to userspace,
- * regardless of the kernel's configuration, as no other checks, besides
- * presence in the hart_isa bitmap, are made.
- */
- EXT_KEY(ZBA);
- EXT_KEY(ZBB);
- EXT_KEY(ZBS);
- EXT_KEY(ZICBOZ);
-#undef EXT_KEY
- }
-
- /* Now turn off reporting features if any CPU is missing it. */
- pair->value &= ~missing;
-}
-
-static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext)
-{
- struct riscv_hwprobe pair;
-
- hwprobe_isa_ext0(&pair, cpus);
- return (pair.value & ext);
-}
-
-static u64 hwprobe_misaligned(const struct cpumask *cpus)
-{
- int cpu;
- u64 perf = -1ULL;
-
- for_each_cpu(cpu, cpus) {
- int this_perf = per_cpu(misaligned_access_speed, cpu);
-
- if (perf == -1ULL)
- perf = this_perf;
-
- if (perf != this_perf) {
- perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
- break;
- }
- }
-
- if (perf == -1ULL)
- return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
-
- return perf;
-}
-
-static void hwprobe_one_pair(struct riscv_hwprobe *pair,
- const struct cpumask *cpus)
-{
- switch (pair->key) {
- case RISCV_HWPROBE_KEY_MVENDORID:
- case RISCV_HWPROBE_KEY_MARCHID:
- case RISCV_HWPROBE_KEY_MIMPID:
- hwprobe_arch_id(pair, cpus);
- break;
- /*
- * The kernel already assumes that the base single-letter ISA
- * extensions are supported on all harts, and only supports the
- * IMA base, so just cheat a bit here and tell that to
- * userspace.
- */
- case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
- pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA;
- break;
-
- case RISCV_HWPROBE_KEY_IMA_EXT_0:
- hwprobe_isa_ext0(pair, cpus);
- break;
-
- case RISCV_HWPROBE_KEY_CPUPERF_0:
- pair->value = hwprobe_misaligned(cpus);
- break;
-
- case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
- pair->value = 0;
- if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
- pair->value = riscv_cboz_block_size;
- break;
-
- /*
- * For forward compatibility, unknown keys don't fail the whole
- * call, but get their element key set to -1 and value set to 0
- * indicating they're unrecognized.
- */
- default:
- pair->key = -1;
- pair->value = 0;
- break;
- }
-}
-
-static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
- size_t pair_count, size_t cpu_count,
- unsigned long __user *cpus_user,
- unsigned int flags)
-{
- size_t out;
- int ret;
- cpumask_t cpus;
-
- /* Check the reserved flags. */
- if (flags != 0)
- return -EINVAL;
-
- /*
- * The interface supports taking in a CPU mask, and returns values that
- * are consistent across that mask. Allow userspace to specify NULL and
- * 0 as a shortcut to all online CPUs.
- */
- cpumask_clear(&cpus);
- if (!cpu_count && !cpus_user) {
- cpumask_copy(&cpus, cpu_online_mask);
- } else {
- if (cpu_count > cpumask_size())
- cpu_count = cpumask_size();
-
- ret = copy_from_user(&cpus, cpus_user, cpu_count);
- if (ret)
- return -EFAULT;
-
- /*
- * Userspace must provide at least one online CPU, without that
- * there's no way to define what is supported.
- */
- cpumask_and(&cpus, &cpus, cpu_online_mask);
- if (cpumask_empty(&cpus))
- return -EINVAL;
- }
-
- for (out = 0; out < pair_count; out++, pairs++) {
- struct riscv_hwprobe pair;
-
- if (get_user(pair.key, &pairs->key))
- return -EFAULT;
-
- pair.value = 0;
- hwprobe_one_pair(&pair, &cpus);
- ret = put_user(pair.key, &pairs->key);
- if (ret == 0)
- ret = put_user(pair.value, &pairs->value);
-
- if (ret)
- return -EFAULT;
- }
-
- return 0;
-}
-
-#ifdef CONFIG_MMU
-
-static int __init init_hwprobe_vdso_data(void)
-{
- struct vdso_data *vd = __arch_get_k_vdso_data();
- struct arch_vdso_data *avd = &vd->arch_data;
- u64 id_bitsmash = 0;
- struct riscv_hwprobe pair;
- int key;
-
- /*
- * Initialize vDSO data with the answers for the "all CPUs" case, to
- * save a syscall in the common case.
- */
- for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) {
- pair.key = key;
- hwprobe_one_pair(&pair, cpu_online_mask);
-
- WARN_ON_ONCE(pair.key < 0);
-
- avd->all_cpu_hwprobe_values[key] = pair.value;
- /*
- * Smash together the vendor, arch, and impl IDs to see if
- * they're all 0 or any negative.
- */
- if (key <= RISCV_HWPROBE_KEY_MIMPID)
- id_bitsmash |= pair.value;
- }
-
- /*
- * If the arch, vendor, and implementation ID are all the same across
- * all harts, then assume all CPUs are the same, and allow the vDSO to
- * answer queries for arbitrary masks. However if all values are 0 (not
- * populated) or any value returns -1 (varies across CPUs), then the
- * vDSO should defer to the kernel for exotic cpu masks.
- */
- avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
- return 0;
-}
-
-arch_initcall_sync(init_hwprobe_vdso_data);
-
-#endif /* CONFIG_MMU */
-
-SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
- size_t, pair_count, size_t, cpu_count, unsigned long __user *,
- cpus, unsigned int, flags)
-{
- return do_riscv_hwprobe(pairs, pair_count, cpu_count,
- cpus, flags);
-}
-
/* Not defined using SYSCALL_DEFINE0 to avoid error injection */
asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *__unused)
{
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 23641e82a9df..ba3477197789 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -12,6 +12,7 @@
#include <asm/sbi.h>
#include <asm/processor.h>
#include <asm/timex.h>
+#include <asm/paravirt.h>
unsigned long riscv_timebase __ro_after_init;
EXPORT_SYMBOL_GPL(riscv_timebase);
@@ -45,4 +46,6 @@ void __init time_init(void)
timer_probe();
tick_setup_hrtimer_broadcast();
+
+ pv_time_init();
}
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 5255f8134aef..8ded225e8c5b 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -319,7 +319,7 @@ static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn)
static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
{
if (user_mode(regs)) {
- return __get_user(*r_val, addr);
+ return __get_user(*r_val, (u8 __user *)addr);
} else {
*r_val = *addr;
return 0;
@@ -329,7 +329,7 @@ static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
{
if (user_mode(regs)) {
- return __put_user(val, addr);
+ return __put_user(val, (u8 __user *)addr);
} else {
*addr = val;
return 0;
@@ -343,7 +343,7 @@ static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
if (user_mode(regs)) { \
__ret = __get_user(insn, insn_addr); \
} else { \
- insn = *insn_addr; \
+ insn = *(__force u16 *)insn_addr; \
__ret = 0; \
} \
\
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
index cadf725ef798..1e926e4b5881 100644
--- a/arch/riscv/kernel/vdso/hwprobe.c
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -3,26 +3,22 @@
* Copyright 2023 Rivos, Inc
*/
+#include <linux/string.h>
#include <linux/types.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
- size_t cpu_count, unsigned long *cpus,
+ size_t cpusetsize, unsigned long *cpus,
unsigned int flags);
-/* Add a prototype to avoid -Wmissing-prototypes warning. */
-int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
- size_t cpu_count, unsigned long *cpus,
- unsigned int flags);
-
-int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
- size_t cpu_count, unsigned long *cpus,
- unsigned int flags)
+static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpusetsize, unsigned long *cpus,
+ unsigned int flags)
{
const struct vdso_data *vd = __arch_get_vdso_data();
const struct arch_vdso_data *avd = &vd->arch_data;
- bool all_cpus = !cpu_count && !cpus;
+ bool all_cpus = !cpusetsize && !cpus;
struct riscv_hwprobe *p = pairs;
struct riscv_hwprobe *end = pairs + pair_count;
@@ -33,7 +29,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
* masks.
*/
if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus))
- return riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags);
+ return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
/* This is something we can handle, fill out the pairs. */
while (p < end) {
@@ -50,3 +46,71 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
return 0;
}
+
+static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpusetsize, unsigned long *cpus,
+ unsigned int flags)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ const struct arch_vdso_data *avd = &vd->arch_data;
+ struct riscv_hwprobe *p = pairs;
+ struct riscv_hwprobe *end = pairs + pair_count;
+ unsigned char *c = (unsigned char *)cpus;
+ bool empty_cpus = true;
+ bool clear_all = false;
+ int i;
+
+ if (!cpusetsize || !cpus)
+ return -EINVAL;
+
+ for (i = 0; i < cpusetsize; i++) {
+ if (c[i]) {
+ empty_cpus = false;
+ break;
+ }
+ }
+
+ if (empty_cpus || flags != RISCV_HWPROBE_WHICH_CPUS || !avd->homogeneous_cpus)
+ return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags);
+
+ while (p < end) {
+ if (riscv_hwprobe_key_is_valid(p->key)) {
+ struct riscv_hwprobe t = {
+ .key = p->key,
+ .value = avd->all_cpu_hwprobe_values[p->key],
+ };
+
+ if (!riscv_hwprobe_pair_cmp(&t, p))
+ clear_all = true;
+ } else {
+ clear_all = true;
+ p->key = -1;
+ p->value = 0;
+ }
+ p++;
+ }
+
+ if (clear_all) {
+ for (i = 0; i < cpusetsize; i++)
+ c[i] = 0;
+ }
+
+ return 0;
+}
+
+/* Add a prototype to avoid -Wmissing-prototypes warning. */
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpusetsize, unsigned long *cpus,
+ unsigned int flags);
+
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpusetsize, unsigned long *cpus,
+ unsigned int flags)
+{
+ if (flags & RISCV_HWPROBE_WHICH_CPUS)
+ return riscv_vdso_get_cpus(pairs, pair_count, cpusetsize,
+ cpus, flags);
+
+ return riscv_vdso_get_values(pairs, pair_count, cpusetsize,
+ cpus, flags);
+}
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 578b6292487e..6727d1d3b8f2 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -21,6 +21,10 @@
#include <asm/bug.h>
static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
+static struct kmem_cache *riscv_v_user_cachep;
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static struct kmem_cache *riscv_v_kernel_cachep;
+#endif
unsigned long riscv_v_vsize __read_mostly;
EXPORT_SYMBOL_GPL(riscv_v_vsize);
@@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void)
return 0;
}
+void __init riscv_v_setup_ctx_cache(void)
+{
+ if (!has_vector())
+ return;
+
+ riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
+ riscv_v_vsize, 16, SLAB_PANIC,
+ 0, riscv_v_vsize, NULL);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
+ riscv_v_vsize, 16,
+ SLAB_PANIC, NULL);
+#endif
+}
+
static bool insn_is_vector(u32 insn_buf)
{
u32 opcode = insn_buf & __INSN_OPCODE_MASK;
@@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf)
return false;
}
-static int riscv_v_thread_zalloc(void)
+static int riscv_v_thread_zalloc(struct kmem_cache *cache,
+ struct __riscv_v_ext_state *ctx)
{
void *datap;
- datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
+ datap = kmem_cache_zalloc(cache, GFP_KERNEL);
if (!datap)
return -ENOMEM;
- current->thread.vstate.datap = datap;
- memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
- datap));
+ ctx->datap = datap;
+ memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
return 0;
}
+void riscv_v_thread_alloc(struct task_struct *tsk)
+{
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
+#endif
+}
+
+void riscv_v_thread_free(struct task_struct *tsk)
+{
+ if (tsk->thread.vstate.datap)
+ kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ if (tsk->thread.kernel_vstate.datap)
+ kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
+#endif
+}
+
#define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
#define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
#define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
@@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
if (inherit)
ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
- tsk->thread.vstate_ctrl = ctrl;
+ tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK;
+ tsk->thread.vstate_ctrl |= ctrl;
}
bool riscv_v_vstate_ctrl_user_allowed(void)
@@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
* context where VS has been off. So, try to allocate the user's V
* context and resume execution.
*/
- if (riscv_v_thread_zalloc()) {
+ if (riscv_v_thread_zalloc(riscv_v_user_cachep, &current->thread.vstate)) {
force_sig(SIGBUS);
return true;
}
riscv_v_vstate_on(regs);
- riscv_v_vstate_restore(current, regs);
+ riscv_v_vstate_set_restore(current, regs);
return true;
}
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index 50767647fbc6..8c3daa1b0531 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -29,10 +29,12 @@ SECTIONS
HEAD_TEXT_SECTION
INIT_TEXT_SECTION(PAGE_SIZE)
/* we have to discard exit text and such at runtime, not link time */
+ __exittext_begin = .;
.exit.text :
{
EXIT_TEXT
}
+ __exittext_end = .;
.text : {
_text = .;
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 492dd4b8f3d6..002ca58dd998 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -69,10 +69,12 @@ SECTIONS
__soc_builtin_dtb_table_end = .;
}
/* we have to discard exit text and such at runtime, not link time */
+ __exittext_begin = .;
.exit.text :
{
EXIT_TEXT
}
+ __exittext_end = .;
__init_text_end = .;
. = ALIGN(SECTION_ALIGN);
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index dfc237d7875b..d490db943858 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -20,18 +20,17 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
depends on RISCV_SBI && MMU
- select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select KVM_COMMON
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_MMIO
select KVM_XFER_TO_GUEST_WORK
- select MMU_NOTIFIER
- select PREEMPT_NOTIFIERS
+ select KVM_GENERIC_MMU_NOTIFIER
+ select SCHED_INFO
help
Support hosting virtualized guest machines.
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 4c2067fc59fc..c9646521f113 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -26,6 +26,7 @@ kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
kvm-y += vcpu_sbi_base.o
kvm-y += vcpu_sbi_replace.o
kvm-y += vcpu_sbi_hsm.o
+kvm-y += vcpu_sbi_sta.o
kvm-y += vcpu_timer.o
kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
kvm-y += aia.o
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 068c74593871..a9e2fd7245e1 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -103,7 +103,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
*ptep_level = current_level;
ptep = (pte_t *)kvm->arch.pgd;
ptep = &ptep[gstage_pte_index(addr, current_level)];
- while (ptep && pte_val(*ptep)) {
+ while (ptep && pte_val(ptep_get(ptep))) {
if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
*ptepp = ptep;
@@ -113,7 +113,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
if (current_level) {
current_level--;
*ptep_level = current_level;
- ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
ptep = &ptep[gstage_pte_index(addr, current_level)];
} else {
ptep = NULL;
@@ -149,25 +149,25 @@ static int gstage_set_pte(struct kvm *kvm, u32 level,
if (gstage_pte_leaf(ptep))
return -EEXIST;
- if (!pte_val(*ptep)) {
+ if (!pte_val(ptep_get(ptep))) {
if (!pcache)
return -ENOMEM;
next_ptep = kvm_mmu_memory_cache_alloc(pcache);
if (!next_ptep)
return -ENOMEM;
- *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
- __pgprot(_PAGE_TABLE));
+ set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
+ __pgprot(_PAGE_TABLE)));
} else {
if (gstage_pte_leaf(ptep))
return -EEXIST;
- next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
}
current_level--;
ptep = &next_ptep[gstage_pte_index(addr, current_level)];
}
- *ptep = *new_pte;
+ set_pte(ptep, *new_pte);
if (gstage_pte_leaf(ptep))
gstage_remote_tlb_flush(kvm, current_level, addr);
@@ -239,11 +239,11 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
BUG_ON(addr & (page_size - 1));
- if (!pte_val(*ptep))
+ if (!pte_val(ptep_get(ptep)))
return;
if (ptep_level && !gstage_pte_leaf(ptep)) {
- next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
next_ptep_level = ptep_level - 1;
ret = gstage_level_to_page_size(next_ptep_level,
&next_page_size);
@@ -261,7 +261,7 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
if (op == GSTAGE_OP_CLEAR)
set_pte(ptep, __pte(0));
else if (op == GSTAGE_OP_WP)
- set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
+ set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE));
gstage_remote_tlb_flush(kvm, ptep_level, addr);
}
}
@@ -603,7 +603,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
&ptep, &ptep_level))
return false;
- return pte_young(*ptep);
+ return pte_young(ptep_get(ptep));
}
int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e087c809073c..b5ca9f2e98ac 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -83,6 +83,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.hfence_tail = 0;
memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
+ kvm_riscv_vcpu_sbi_sta_reset(vcpu);
+
/* Reset the guest CSRs for hotplug usecase */
if (loaded)
kvm_arch_vcpu_load(vcpu, smp_processor_id());
@@ -541,6 +543,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_riscv_vcpu_aia_load(vcpu, cpu);
+ kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+
vcpu->cpu = cpu;
}
@@ -614,6 +618,9 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
kvm_riscv_hfence_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
+ kvm_riscv_vcpu_record_steal_time(vcpu);
}
}
@@ -757,8 +764,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Update HVIP CSR for current CPU */
kvm_riscv_update_hvip(vcpu);
- if (ret <= 0 ||
- kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
+ if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
kvm_request_pending(vcpu) ||
xfer_to_guest_mode_work_pending()) {
vcpu->mode = OUTSIDE_GUEST_MODE;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index f8c9fa0c03c5..fc34557f5356 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -485,7 +485,7 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
rc = kvm_riscv_vcpu_smstateen_set_csr(vcpu, reg_num,
reg_val);
-break;
+ break;
default:
rc = -ENOENT;
break;
@@ -931,50 +931,106 @@ static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
return copy_isa_ext_reg_indices(vcpu, NULL);;
}
-static inline unsigned long num_sbi_ext_regs(void)
+static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
- /*
- * number of KVM_REG_RISCV_SBI_SINGLE +
- * 2 x (number of KVM_REG_RISCV_SBI_MULTI)
- */
- return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1);
-}
-
-static int copy_sbi_ext_reg_indices(u64 __user *uindices)
-{
- int n;
+ unsigned int n = 0;
- /* copy KVM_REG_RISCV_SBI_SINGLE */
- n = KVM_RISCV_SBI_EXT_MAX;
- for (int i = 0; i < n; i++) {
+ for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) {
u64 size = IS_ENABLED(CONFIG_32BIT) ?
KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
KVM_REG_RISCV_SBI_SINGLE | i;
+ if (!riscv_vcpu_supports_sbi_ext(vcpu, i))
+ continue;
+
if (uindices) {
if (put_user(reg, uindices))
return -EFAULT;
uindices++;
}
+
+ n++;
}
- /* copy KVM_REG_RISCV_SBI_MULTI */
- n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1;
- for (int i = 0; i < n; i++) {
- u64 size = IS_ENABLED(CONFIG_32BIT) ?
- KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
- u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
- KVM_REG_RISCV_SBI_MULTI_EN | i;
+ return n;
+}
+
+static unsigned long num_sbi_ext_regs(struct kvm_vcpu *vcpu)
+{
+ return copy_sbi_ext_reg_indices(vcpu, NULL);
+}
+
+static int copy_sbi_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ int total = 0;
+
+ if (scontext->ext_status[KVM_RISCV_SBI_EXT_STA] == KVM_RISCV_SBI_EXT_STATUS_ENABLED) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ int n = sizeof(struct kvm_riscv_sbi_sta) / sizeof(unsigned long);
+
+ for (int i = 0; i < n; i++) {
+ u64 reg = KVM_REG_RISCV | size |
+ KVM_REG_RISCV_SBI_STATE |
+ KVM_REG_RISCV_SBI_STA | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ total += n;
+ }
+
+ return total;
+}
+
+static inline unsigned long num_sbi_regs(struct kvm_vcpu *vcpu)
+{
+ return copy_sbi_reg_indices(vcpu, NULL);
+}
+
+static inline unsigned long num_vector_regs(const struct kvm_vcpu *vcpu)
+{
+ if (!riscv_isa_extension_available(vcpu->arch.isa, v))
+ return 0;
+
+ /* vstart, vl, vtype, vcsr, vlenb and 32 vector regs */
+ return 37;
+}
+
+static int copy_vector_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ int n = num_vector_regs(vcpu);
+ u64 reg, size;
+ int i;
+
+ if (n == 0)
+ return 0;
+
+ /* copy vstart, vl, vtype, vcsr and vlenb */
+ size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ for (i = 0; i < 5; i++) {
+ reg = KVM_REG_RISCV | size | KVM_REG_RISCV_VECTOR | i;
if (uindices) {
if (put_user(reg, uindices))
return -EFAULT;
uindices++;
}
+ }
- reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
- KVM_REG_RISCV_SBI_MULTI_DIS | i;
+ /* vector_regs have a variable 'vlenb' size */
+ size = __builtin_ctzl(cntx->vector.vlenb);
+ size <<= KVM_REG_SIZE_SHIFT;
+ for (i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size |
+ KVM_REG_RISCV_VECTOR_REG(i);
if (uindices) {
if (put_user(reg, uindices))
@@ -983,7 +1039,7 @@ static int copy_sbi_ext_reg_indices(u64 __user *uindices)
}
}
- return num_sbi_ext_regs();
+ return n;
}
/*
@@ -1001,8 +1057,10 @@ unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu)
res += num_timer_regs();
res += num_fp_f_regs(vcpu);
res += num_fp_d_regs(vcpu);
+ res += num_vector_regs(vcpu);
res += num_isa_ext_regs(vcpu);
- res += num_sbi_ext_regs();
+ res += num_sbi_ext_regs(vcpu);
+ res += num_sbi_regs(vcpu);
return res;
}
@@ -1045,14 +1103,25 @@ int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
return ret;
uindices += ret;
+ ret = copy_vector_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
ret = copy_isa_ext_reg_indices(vcpu, uindices);
if (ret < 0)
return ret;
uindices += ret;
- ret = copy_sbi_ext_reg_indices(uindices);
+ ret = copy_sbi_ext_reg_indices(vcpu, uindices);
if (ret < 0)
return ret;
+ uindices += ret;
+
+ ret = copy_sbi_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
return 0;
}
@@ -1075,12 +1144,14 @@ int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
case KVM_REG_RISCV_FP_D:
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
case KVM_REG_RISCV_ISA_EXT:
return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
- case KVM_REG_RISCV_VECTOR:
- return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
+ case KVM_REG_RISCV_SBI_STATE:
+ return kvm_riscv_vcpu_set_reg_sbi(vcpu, reg);
default:
break;
}
@@ -1106,12 +1177,14 @@ int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
case KVM_REG_RISCV_FP_D:
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
case KVM_REG_RISCV_ISA_EXT:
return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
case KVM_REG_RISCV_SBI_EXT:
return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
- case KVM_REG_RISCV_VECTOR:
- return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
+ case KVM_REG_RISCV_SBI_STATE:
+ return kvm_riscv_vcpu_get_reg_sbi(vcpu, reg);
default:
break;
}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index a04ff98085d9..72a2ffb8dcd1 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
.ext_ptr = &vcpu_sbi_ext_dbcn,
},
{
+ .ext_idx = KVM_RISCV_SBI_EXT_STA,
+ .ext_ptr = &vcpu_sbi_ext_sta,
+ },
+ {
.ext_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL,
.ext_ptr = &vcpu_sbi_ext_experimental,
},
@@ -80,6 +84,34 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
},
};
+static const struct kvm_riscv_sbi_extension_entry *
+riscv_vcpu_get_sbi_ext(struct kvm_vcpu *vcpu, unsigned long idx)
+{
+ const struct kvm_riscv_sbi_extension_entry *sext = NULL;
+
+ if (idx >= KVM_RISCV_SBI_EXT_MAX)
+ return NULL;
+
+ for (int i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ if (sbi_ext[i].ext_idx == idx) {
+ sext = &sbi_ext[i];
+ break;
+ }
+ }
+
+ return sext;
+}
+
+bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx)
+{
+ struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ const struct kvm_riscv_sbi_extension_entry *sext;
+
+ sext = riscv_vcpu_get_sbi_ext(vcpu, idx);
+
+ return sext && scontext->ext_status[sext->ext_idx] != KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
+}
+
void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -140,28 +172,19 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long reg_val)
{
- unsigned long i;
- const struct kvm_riscv_sbi_extension_entry *sext = NULL;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
-
- if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
- return -ENOENT;
+ const struct kvm_riscv_sbi_extension_entry *sext;
if (reg_val != 1 && reg_val != 0)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
- if (sbi_ext[i].ext_idx == reg_num) {
- sext = &sbi_ext[i];
- break;
- }
- }
- if (!sext)
+ sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num);
+ if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE)
return -ENOENT;
scontext->ext_status[sext->ext_idx] = (reg_val) ?
- KVM_RISCV_SBI_EXT_AVAILABLE :
- KVM_RISCV_SBI_EXT_UNAVAILABLE;
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED :
+ KVM_RISCV_SBI_EXT_STATUS_DISABLED;
return 0;
}
@@ -170,24 +193,16 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
unsigned long reg_num,
unsigned long *reg_val)
{
- unsigned long i;
- const struct kvm_riscv_sbi_extension_entry *sext = NULL;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
+ const struct kvm_riscv_sbi_extension_entry *sext;
- if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
- return -ENOENT;
-
- for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
- if (sbi_ext[i].ext_idx == reg_num) {
- sext = &sbi_ext[i];
- break;
- }
- }
- if (!sext)
+ sext = riscv_vcpu_get_sbi_ext(vcpu, reg_num);
+ if (!sext || scontext->ext_status[sext->ext_idx] == KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE)
return -ENOENT;
*reg_val = scontext->ext_status[sext->ext_idx] ==
- KVM_RISCV_SBI_EXT_AVAILABLE;
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED;
+
return 0;
}
@@ -310,6 +325,69 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_riscv_vcpu_set_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_SBI_STATE);
+ unsigned long reg_subtype, reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_STA:
+ return kvm_riscv_vcpu_set_reg_sbi_sta(vcpu, reg_num, reg_val);
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_get_reg_sbi(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_SBI_STATE);
+ unsigned long reg_subtype, reg_val;
+ int ret;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_STA:
+ ret = kvm_riscv_vcpu_get_reg_sbi_sta(vcpu, reg_num, &reg_val);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
struct kvm_vcpu *vcpu, unsigned long extid)
{
@@ -325,7 +403,7 @@ const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(
if (ext->extid_start <= extid && ext->extid_end >= extid) {
if (entry->ext_idx >= KVM_RISCV_SBI_EXT_MAX ||
scontext->ext_status[entry->ext_idx] ==
- KVM_RISCV_SBI_EXT_AVAILABLE)
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED)
return ext;
return NULL;
@@ -413,12 +491,12 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu)
if (ext->probe && !ext->probe(vcpu)) {
scontext->ext_status[entry->ext_idx] =
- KVM_RISCV_SBI_EXT_UNAVAILABLE;
+ KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
continue;
}
- scontext->ext_status[entry->ext_idx] = ext->default_unavail ?
- KVM_RISCV_SBI_EXT_UNAVAILABLE :
- KVM_RISCV_SBI_EXT_AVAILABLE;
+ scontext->ext_status[entry->ext_idx] = ext->default_disabled ?
+ KVM_RISCV_SBI_EXT_STATUS_DISABLED :
+ KVM_RISCV_SBI_EXT_STATUS_ENABLED;
}
}
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 23b57c931b15..9c2ab3dfa93a 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -204,6 +204,6 @@ static int kvm_sbi_ext_dbcn_handler(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_dbcn = {
.extid_start = SBI_EXT_DBCN,
.extid_end = SBI_EXT_DBCN,
- .default_unavail = true,
+ .default_disabled = true,
.handler = kvm_sbi_ext_dbcn_handler,
};
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
new file mode 100644
index 000000000000..01f09fe8c3b0
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ */
+
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/sizes.h>
+
+#include <asm/bug.h>
+#include <asm/current.h>
+#include <asm/kvm_vcpu_sbi.h>
+#include <asm/page.h>
+#include <asm/sbi.h>
+#include <asm/uaccess.h>
+
+void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.sta.shmem = INVALID_GPA;
+ vcpu->arch.sta.last_steal = 0;
+}
+
+void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
+{
+ gpa_t shmem = vcpu->arch.sta.shmem;
+ u64 last_steal = vcpu->arch.sta.last_steal;
+ u32 *sequence_ptr, sequence;
+ u64 *steal_ptr, steal;
+ unsigned long hva;
+ gfn_t gfn;
+
+ if (shmem == INVALID_GPA)
+ return;
+
+ /*
+ * shmem is 64-byte aligned (see the enforcement in
+ * kvm_sbi_sta_steal_time_set_shmem()) and the size of sbi_sta_struct
+ * is 64 bytes, so we know all its offsets are in the same page.
+ */
+ gfn = shmem >> PAGE_SHIFT;
+ hva = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+
+ if (WARN_ON(kvm_is_error_hva(hva))) {
+ vcpu->arch.sta.shmem = INVALID_GPA;
+ return;
+ }
+
+ sequence_ptr = (u32 *)(hva + offset_in_page(shmem) +
+ offsetof(struct sbi_sta_struct, sequence));
+ steal_ptr = (u64 *)(hva + offset_in_page(shmem) +
+ offsetof(struct sbi_sta_struct, steal));
+
+ if (WARN_ON(get_user(sequence, sequence_ptr)))
+ return;
+
+ sequence = le32_to_cpu(sequence);
+ sequence += 1;
+
+ if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)))
+ return;
+
+ if (!WARN_ON(get_user(steal, steal_ptr))) {
+ steal = le64_to_cpu(steal);
+ vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay);
+ steal += vcpu->arch.sta.last_steal - last_steal;
+ WARN_ON(put_user(cpu_to_le64(steal), steal_ptr));
+ }
+
+ sequence += 1;
+ WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr));
+
+ kvm_vcpu_mark_page_dirty(vcpu, gfn);
+}
+
+static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long shmem_phys_lo = cp->a0;
+ unsigned long shmem_phys_hi = cp->a1;
+ u32 flags = cp->a2;
+ struct sbi_sta_struct zero_sta = {0};
+ unsigned long hva;
+ bool writable;
+ gpa_t shmem;
+ int ret;
+
+ if (flags != 0)
+ return SBI_ERR_INVALID_PARAM;
+
+ if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
+ shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
+ vcpu->arch.sta.shmem = INVALID_GPA;
+ return 0;
+ }
+
+ if (shmem_phys_lo & (SZ_64 - 1))
+ return SBI_ERR_INVALID_PARAM;
+
+ shmem = shmem_phys_lo;
+
+ if (shmem_phys_hi != 0) {
+ if (IS_ENABLED(CONFIG_32BIT))
+ shmem |= ((gpa_t)shmem_phys_hi << 32);
+ else
+ return SBI_ERR_INVALID_ADDRESS;
+ }
+
+ hva = kvm_vcpu_gfn_to_hva_prot(vcpu, shmem >> PAGE_SHIFT, &writable);
+ if (kvm_is_error_hva(hva) || !writable)
+ return SBI_ERR_INVALID_ADDRESS;
+
+ ret = kvm_vcpu_write_guest(vcpu, shmem, &zero_sta, sizeof(zero_sta));
+ if (ret)
+ return SBI_ERR_FAILURE;
+
+ vcpu->arch.sta.shmem = shmem;
+ vcpu->arch.sta.last_steal = current->sched_info.run_delay;
+
+ return 0;
+}
+
+static int kvm_sbi_ext_sta_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long funcid = cp->a6;
+ int ret;
+
+ switch (funcid) {
+ case SBI_EXT_STA_STEAL_TIME_SET_SHMEM:
+ ret = kvm_sbi_sta_steal_time_set_shmem(vcpu);
+ break;
+ default:
+ ret = SBI_ERR_NOT_SUPPORTED;
+ break;
+ }
+
+ retdata->err_val = ret;
+
+ return 0;
+}
+
+static unsigned long kvm_sbi_ext_sta_probe(struct kvm_vcpu *vcpu)
+{
+ return !!sched_info_on();
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta = {
+ .extid_start = SBI_EXT_STA,
+ .extid_end = SBI_EXT_STA,
+ .handler = kvm_sbi_ext_sta_handler,
+ .probe = kvm_sbi_ext_sta_probe,
+};
+
+int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long *reg_val)
+{
+ switch (reg_num) {
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_lo):
+ *reg_val = (unsigned long)vcpu->arch.sta.shmem;
+ break;
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
+ if (IS_ENABLED(CONFIG_32BIT))
+ *reg_val = upper_32_bits(vcpu->arch.sta.shmem);
+ else
+ *reg_val = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg_sbi_sta(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long reg_val)
+{
+ switch (reg_num) {
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_lo):
+ if (IS_ENABLED(CONFIG_32BIT)) {
+ gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem);
+
+ vcpu->arch.sta.shmem = reg_val;
+ vcpu->arch.sta.shmem |= hi << 32;
+ } else {
+ vcpu->arch.sta.shmem = reg_val;
+ }
+ break;
+ case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
+ if (IS_ENABLED(CONFIG_32BIT)) {
+ gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem);
+
+ vcpu->arch.sta.shmem = ((gpa_t)reg_val << 32);
+ vcpu->arch.sta.shmem |= lo;
+ } else if (reg_val != 0) {
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
index d74df8eb4d71..0c26189aa01c 100644
--- a/arch/riscv/kvm/vcpu_switch.S
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -15,7 +15,7 @@
.altmacro
.option norelax
-ENTRY(__kvm_riscv_switch_to)
+SYM_FUNC_START(__kvm_riscv_switch_to)
/* Save Host GPRs (except A0 and T0-T6) */
REG_S ra, (KVM_ARCH_HOST_RA)(a0)
REG_S sp, (KVM_ARCH_HOST_SP)(a0)
@@ -45,7 +45,7 @@ ENTRY(__kvm_riscv_switch_to)
REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
- la t4, __kvm_switch_return
+ la t4, .Lkvm_switch_return
REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0)
/* Save Host and Restore Guest SSTATUS */
@@ -113,7 +113,7 @@ ENTRY(__kvm_riscv_switch_to)
/* Back to Host */
.align 2
-__kvm_switch_return:
+.Lkvm_switch_return:
/* Swap Guest A0 with SSCRATCH */
csrrw a0, CSR_SSCRATCH, a0
@@ -208,9 +208,9 @@ __kvm_switch_return:
/* Return to C code */
ret
-ENDPROC(__kvm_riscv_switch_to)
+SYM_FUNC_END(__kvm_riscv_switch_to)
-ENTRY(__kvm_riscv_unpriv_trap)
+SYM_CODE_START(__kvm_riscv_unpriv_trap)
/*
* We assume that faulting unpriv load/store instruction is
* 4-byte long and blindly increment SEPC by 4.
@@ -231,12 +231,10 @@ ENTRY(__kvm_riscv_unpriv_trap)
csrr a1, CSR_HTINST
REG_S a1, (KVM_ARCH_TRAP_HTINST)(a0)
sret
-ENDPROC(__kvm_riscv_unpriv_trap)
+SYM_CODE_END(__kvm_riscv_unpriv_trap)
#ifdef CONFIG_FPU
- .align 3
- .global __kvm_riscv_fp_f_save
-__kvm_riscv_fp_f_save:
+SYM_FUNC_START(__kvm_riscv_fp_f_save)
csrr t2, CSR_SSTATUS
li t1, SR_FS
csrs CSR_SSTATUS, t1
@@ -276,10 +274,9 @@ __kvm_riscv_fp_f_save:
sw t0, KVM_ARCH_FP_F_FCSR(a0)
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_f_save)
- .align 3
- .global __kvm_riscv_fp_d_save
-__kvm_riscv_fp_d_save:
+SYM_FUNC_START(__kvm_riscv_fp_d_save)
csrr t2, CSR_SSTATUS
li t1, SR_FS
csrs CSR_SSTATUS, t1
@@ -319,10 +316,9 @@ __kvm_riscv_fp_d_save:
sw t0, KVM_ARCH_FP_D_FCSR(a0)
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_d_save)
- .align 3
- .global __kvm_riscv_fp_f_restore
-__kvm_riscv_fp_f_restore:
+SYM_FUNC_START(__kvm_riscv_fp_f_restore)
csrr t2, CSR_SSTATUS
li t1, SR_FS
lw t0, KVM_ARCH_FP_F_FCSR(a0)
@@ -362,10 +358,9 @@ __kvm_riscv_fp_f_restore:
fscsr t0
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_f_restore)
- .align 3
- .global __kvm_riscv_fp_d_restore
-__kvm_riscv_fp_d_restore:
+SYM_FUNC_START(__kvm_riscv_fp_d_restore)
csrr t2, CSR_SSTATUS
li t1, SR_FS
lw t0, KVM_ARCH_FP_D_FCSR(a0)
@@ -405,4 +400,5 @@ __kvm_riscv_fp_d_restore:
fscsr t0
csrw CSR_SSTATUS, t2
ret
+SYM_FUNC_END(__kvm_riscv_fp_d_restore)
#endif
diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c
index b339a2682f25..d92d1348045c 100644
--- a/arch/riscv/kvm/vcpu_vector.c
+++ b/arch/riscv/kvm/vcpu_vector.c
@@ -76,6 +76,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL);
if (!cntx->vector.datap)
return -ENOMEM;
+ cntx->vector.vlenb = riscv_v_vsize / 32;
vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
if (!vcpu->arch.host_context.vector.datap)
@@ -115,6 +116,9 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
*reg_addr = &cntx->vector.vcsr;
break;
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ *reg_addr = &cntx->vector.vlenb;
+ break;
case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
default:
return -ENOENT;
@@ -173,6 +177,18 @@ int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
if (!riscv_isa_extension_available(isa, v))
return -ENOENT;
+ if (reg_num == KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)) {
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long reg_val;
+
+ if (copy_from_user(&reg_val, uaddr, reg_size))
+ return -EFAULT;
+ if (reg_val != cntx->vector.vlenb)
+ return -EINVAL;
+
+ return 0;
+ }
+
rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
if (rc)
return rc;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 7e2b50c692c1..ce58bc48e5b8 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -179,7 +179,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = kvm_riscv_aia_available();
break;
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 26cb2502ecf8..bd6e6c1b0497 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -6,8 +6,14 @@ lib-y += memmove.o
lib-y += strcmp.o
lib-y += strlen.o
lib-y += strncmp.o
+lib-y += csum.o
+ifeq ($(CONFIG_MMU), y)
+lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
+endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RISCV_ISA_V) += xor.o
+lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S
index b22de1231144..20ff03f5b0f2 100644
--- a/arch/riscv/lib/clear_page.S
+++ b/arch/riscv/lib/clear_page.S
@@ -4,9 +4,9 @@
*/
#include <linux/linkage.h>
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/alternative-macros.h>
-#include <asm-generic/export.h>
#include <asm/hwcap.h>
#include <asm/insn-def.h>
#include <asm/page.h>
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
new file mode 100644
index 000000000000..af3df5274ccb
--- /dev/null
+++ b/arch/riscv/lib/csum.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Checksum library
+ *
+ * Influenced by arch/arm64/lib/csum.c
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/jump_label.h>
+#include <linux/kasan-checks.h>
+#include <linux/kernel.h>
+
+#include <asm/cpufeature.h>
+
+#include <net/checksum.h>
+
+/* Default version is sufficient for 32 bit */
+#ifndef CONFIG_32BIT
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ unsigned int ulen, uproto;
+ unsigned long sum = (__force unsigned long)csum;
+
+ sum += (__force unsigned long)saddr->s6_addr32[0];
+ sum += (__force unsigned long)saddr->s6_addr32[1];
+ sum += (__force unsigned long)saddr->s6_addr32[2];
+ sum += (__force unsigned long)saddr->s6_addr32[3];
+
+ sum += (__force unsigned long)daddr->s6_addr32[0];
+ sum += (__force unsigned long)daddr->s6_addr32[1];
+ sum += (__force unsigned long)daddr->s6_addr32[2];
+ sum += (__force unsigned long)daddr->s6_addr32[3];
+
+ ulen = (__force unsigned int)htonl((unsigned int)len);
+ sum += ulen;
+
+ uproto = (__force unsigned int)htonl(proto);
+ sum += uproto;
+
+ /*
+ * Zbb support saves 4 instructions, so not worth checking without
+ * alternatives if supported
+ */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+ IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ unsigned long fold_temp;
+
+ /*
+ * Zbb is likely available when the kernel is compiled with Zbb
+ * support, so nop when Zbb is available and jump when Zbb is
+ * not available.
+ */
+ asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ RISCV_ISA_EXT_ZBB, 1)
+ :
+ :
+ :
+ : no_zbb);
+ asm(".option push \n\
+ .option arch,+zbb \n\
+ rori %[fold_temp], %[sum], 32 \n\
+ add %[sum], %[fold_temp], %[sum] \n\
+ srli %[sum], %[sum], 32 \n\
+ not %[fold_temp], %[sum] \n\
+ roriw %[sum], %[sum], 16 \n\
+ subw %[sum], %[fold_temp], %[sum] \n\
+ .option pop"
+ : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
+ return (__force __sum16)(sum >> 16);
+ }
+no_zbb:
+ sum += ror64(sum, 32);
+ sum >>= 32;
+ return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif /* !CONFIG_32BIT */
+
+#ifdef CONFIG_32BIT
+#define OFFSET_MASK 3
+#elif CONFIG_64BIT
+#define OFFSET_MASK 7
+#endif
+
+static inline __no_sanitize_address unsigned long
+do_csum_common(const unsigned long *ptr, const unsigned long *end,
+ unsigned long data)
+{
+ unsigned int shift;
+ unsigned long csum = 0, carry = 0;
+
+ /*
+ * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
+ * faster than doing 32-bit reads on architectures that support larger
+ * reads.
+ */
+ while (ptr < end) {
+ csum += data;
+ carry += csum < data;
+ data = *(ptr++);
+ }
+
+ /*
+ * Perform alignment (and over-read) bytes on the tail if any bytes
+ * leftover.
+ */
+ shift = ((long)ptr - (long)end) * 8;
+#ifdef __LITTLE_ENDIAN
+ data = (data << shift) >> shift;
+#else
+ data = (data >> shift) << shift;
+#endif
+ csum += data;
+ carry += csum < data;
+ csum += carry;
+ csum += csum < carry;
+
+ return csum;
+}
+
+/*
+ * Algorithm accounts for buff being misaligned.
+ * If buff is not aligned, will over-read bytes but not use the bytes that it
+ * shouldn't. The same thing will occur on the tail-end of the read.
+ */
+static inline __no_sanitize_address unsigned int
+do_csum_with_alignment(const unsigned char *buff, int len)
+{
+ unsigned int offset, shift;
+ unsigned long csum, data;
+ const unsigned long *ptr, *end;
+
+ /*
+ * Align address to closest word (double word on rv64) that comes before
+ * buff. This should always be in the same page and cache line.
+ * Directly call KASAN with the alignment we will be using.
+ */
+ offset = (unsigned long)buff & OFFSET_MASK;
+ kasan_check_read(buff, len);
+ ptr = (const unsigned long *)(buff - offset);
+
+ /*
+ * Clear the most significant bytes that were over-read if buff was not
+ * aligned.
+ */
+ shift = offset * 8;
+ data = *(ptr++);
+#ifdef __LITTLE_ENDIAN
+ data = (data >> shift) << shift;
+#else
+ data = (data << shift) >> shift;
+#endif
+ end = (const unsigned long *)(buff + len);
+ csum = do_csum_common(ptr, end, data);
+
+#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
+ /*
+ * Zbb support saves 6 instructions, so not worth checking without
+ * alternatives if supported
+ */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+ IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ unsigned long fold_temp;
+
+ /*
+ * Zbb is likely available when the kernel is compiled with Zbb
+ * support, so nop when Zbb is available and jump when Zbb is
+ * not available.
+ */
+ asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ RISCV_ISA_EXT_ZBB, 1)
+ :
+ :
+ :
+ : no_zbb);
+
+#ifdef CONFIG_32BIT
+ asm_volatile_goto(".option push \n\
+ .option arch,+zbb \n\
+ rori %[fold_temp], %[csum], 16 \n\
+ andi %[offset], %[offset], 1 \n\
+ add %[csum], %[fold_temp], %[csum] \n\
+ beq %[offset], zero, %l[end] \n\
+ rev8 %[csum], %[csum] \n\
+ .option pop"
+ : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+ : [offset] "r" (offset)
+ :
+ : end);
+
+ return (unsigned short)csum;
+#else /* !CONFIG_32BIT */
+ asm_volatile_goto(".option push \n\
+ .option arch,+zbb \n\
+ rori %[fold_temp], %[csum], 32 \n\
+ add %[csum], %[fold_temp], %[csum] \n\
+ srli %[csum], %[csum], 32 \n\
+ roriw %[fold_temp], %[csum], 16 \n\
+ addw %[csum], %[fold_temp], %[csum] \n\
+ andi %[offset], %[offset], 1 \n\
+ beq %[offset], zero, %l[end] \n\
+ rev8 %[csum], %[csum] \n\
+ .option pop"
+ : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+ : [offset] "r" (offset)
+ :
+ : end);
+
+ return (csum << 16) >> 48;
+#endif /* !CONFIG_32BIT */
+end:
+ return csum >> 16;
+ }
+no_zbb:
+#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
+#ifndef CONFIG_32BIT
+ csum += ror64(csum, 32);
+ csum >>= 32;
+#endif
+ csum = (u32)csum + ror32((u32)csum, 16);
+ if (offset & 1)
+ return (u16)swab32(csum);
+ return csum >> 16;
+}
+
+/*
+ * Does not perform alignment, should only be used if machine has fast
+ * misaligned accesses, or when buff is known to be aligned.
+ */
+static inline __no_sanitize_address unsigned int
+do_csum_no_alignment(const unsigned char *buff, int len)
+{
+ unsigned long csum, data;
+ const unsigned long *ptr, *end;
+
+ ptr = (const unsigned long *)(buff);
+ data = *(ptr++);
+
+ kasan_check_read(buff, len);
+
+ end = (const unsigned long *)(buff + len);
+ csum = do_csum_common(ptr, end, data);
+
+ /*
+ * Zbb support saves 6 instructions, so not worth checking without
+ * alternatives if supported
+ */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+ IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+ unsigned long fold_temp;
+
+ /*
+ * Zbb is likely available when the kernel is compiled with Zbb
+ * support, so nop when Zbb is available and jump when Zbb is
+ * not available.
+ */
+ asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+ RISCV_ISA_EXT_ZBB, 1)
+ :
+ :
+ :
+ : no_zbb);
+
+#ifdef CONFIG_32BIT
+ asm (".option push \n\
+ .option arch,+zbb \n\
+ rori %[fold_temp], %[csum], 16 \n\
+ add %[csum], %[fold_temp], %[csum] \n\
+ .option pop"
+ : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+ :
+ : );
+
+#else /* !CONFIG_32BIT */
+ asm (".option push \n\
+ .option arch,+zbb \n\
+ rori %[fold_temp], %[csum], 32 \n\
+ add %[csum], %[fold_temp], %[csum] \n\
+ srli %[csum], %[csum], 32 \n\
+ roriw %[fold_temp], %[csum], 16 \n\
+ addw %[csum], %[fold_temp], %[csum] \n\
+ .option pop"
+ : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+ :
+ : );
+#endif /* !CONFIG_32BIT */
+ return csum >> 16;
+ }
+no_zbb:
+#ifndef CONFIG_32BIT
+ csum += ror64(csum, 32);
+ csum >>= 32;
+#endif
+ csum = (u32)csum + ror32((u32)csum, 16);
+ return csum >> 16;
+}
+
+/*
+ * Perform a checksum on an arbitrary memory address.
+ * Will do a light-weight address alignment if buff is misaligned, unless
+ * cpu supports fast misaligned accesses.
+ */
+unsigned int do_csum(const unsigned char *buff, int len)
+{
+ if (unlikely(len <= 0))
+ return 0;
+
+ /*
+ * Significant performance gains can be seen by not doing alignment
+ * on machines with fast misaligned accesses.
+ *
+ * There is some duplicate code between the "with_alignment" and
+ * "no_alignment" implmentations, but the overlap is too awkward to be
+ * able to fit in one function without introducing multiple static
+ * branches. The largest chunk of overlap was delegated into the
+ * do_csum_common function.
+ */
+ if (static_branch_likely(&fast_misaligned_access_speed_key))
+ return do_csum_no_alignment(buff, len);
+
+ if (((unsigned long)buff & OFFSET_MASK) == 0)
+ return do_csum_no_alignment(buff, len);
+
+ return do_csum_with_alignment(buff, len);
+}
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
new file mode 100644
index 000000000000..be38a93cedae
--- /dev/null
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ * Author: Andy Chiu <andy.chiu@sifive.com>
+ */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#include <asm/vector.h>
+#include <asm/simd.h>
+
+#ifdef CONFIG_MMU
+#include <asm/asm-prototypes.h>
+#endif
+
+#ifdef CONFIG_MMU
+size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
+int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int fallback_scalar_usercopy(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+{
+ size_t remain, copied;
+
+ /* skip has_vector() check because it has been done by the asm */
+ if (!may_use_simd())
+ goto fallback;
+
+ kernel_vector_begin();
+ remain = __asm_vector_usercopy(dst, src, n);
+ kernel_vector_end();
+
+ if (remain) {
+ copied = n - remain;
+ dst += copied;
+ src += copied;
+ n = remain;
+ goto fallback;
+ }
+
+ return remain;
+
+fallback:
+ return fallback_scalar_usercopy(dst, src, n);
+}
+#endif
diff --git a/arch/riscv/lib/tishift.S b/arch/riscv/lib/tishift.S
index ef90075c4b0a..c8294bf72c06 100644
--- a/arch/riscv/lib/tishift.S
+++ b/arch/riscv/lib/tishift.S
@@ -4,7 +4,7 @@
*/
#include <linux/linkage.h>
-#include <asm-generic/export.h>
+#include <linux/export.h>
SYM_FUNC_START(__lshrti3)
beqz a2, .L1
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 3ab438f30d13..bc22c078aba8 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -1,8 +1,10 @@
#include <linux/linkage.h>
-#include <asm-generic/export.h>
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-extable.h>
#include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
.macro fixup op reg addr lbl
100:
@@ -11,6 +13,13 @@
.endm
SYM_FUNC_START(__asm_copy_to_user)
+#ifdef CONFIG_RISCV_ISA_V
+ ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
+ REG_L t0, riscv_v_usercopy_threshold
+ bltu a2, t0, fallback_scalar_usercopy
+ tail enter_vector_usercopy
+#endif
+SYM_FUNC_START(fallback_scalar_usercopy)
/* Enable access to user memory */
li t6, SR_SUM
@@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user)
sub a0, t5, a0
ret
SYM_FUNC_END(__asm_copy_to_user)
+SYM_FUNC_END(fallback_scalar_usercopy)
EXPORT_SYMBOL(__asm_copy_to_user)
SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
EXPORT_SYMBOL(__asm_copy_from_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
new file mode 100644
index 000000000000..51ab5588e9ff
--- /dev/null
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+#include <asm/asm-extable.h>
+#include <asm/csr.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+ .macro fixup op reg addr lbl
+100:
+ \op \reg, \addr
+ _asm_extable 100b, \lbl
+ .endm
+
+SYM_FUNC_START(__asm_vector_usercopy)
+ /* Enable access to user memory */
+ li t6, SR_SUM
+ csrs CSR_STATUS, t6
+
+loop:
+ vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+ fixup vle8.v vData, (pSrc), 10f
+ sub iNum, iNum, iVL
+ add pSrc, pSrc, iVL
+ fixup vse8.v vData, (pDst), 11f
+ add pDst, pDst, iVL
+ bnez iNum, loop
+
+ /* Exception fixup for vector load is shared with normal exit */
+10:
+ /* Disable access to user memory */
+ csrc CSR_STATUS, t6
+ mv a0, iNum
+ ret
+
+ /* Exception fixup code for vector store. */
+11:
+ /* Undo the subtraction after vle8.v */
+ add iNum, iNum, iVL
+ /* Make sure the scalar fallback skip already processed bytes */
+ csrr t2, CSR_VSTART
+ sub iNum, iNum, t2
+ j 10b
+SYM_FUNC_END(__asm_vector_usercopy)
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..b28f2430e52f
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <linux/export.h>
+#include <asm/asm.h>
+
+SYM_FUNC_START(xor_regs_2_)
+ vsetvli a3, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a3
+ vxor.vv v16, v0, v8
+ add a2, a2, a3
+ vse8.v v16, (a1)
+ add a1, a1, a3
+ bnez a0, xor_regs_2_
+ ret
+SYM_FUNC_END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+SYM_FUNC_START(xor_regs_3_)
+ vsetvli a4, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a4
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a4
+ vxor.vv v16, v0, v16
+ add a3, a3, a4
+ vse8.v v16, (a1)
+ add a1, a1, a4
+ bnez a0, xor_regs_3_
+ ret
+SYM_FUNC_END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+SYM_FUNC_START(xor_regs_4_)
+ vsetvli a5, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a5
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a5
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a5
+ vxor.vv v16, v0, v24
+ add a4, a4, a5
+ vse8.v v16, (a1)
+ add a1, a1, a5
+ bnez a0, xor_regs_4_
+ ret
+SYM_FUNC_END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+SYM_FUNC_START(xor_regs_5_)
+ vsetvli a6, a0, e8, m8, ta, ma
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a6
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a6
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a6
+ vxor.vv v0, v0, v24
+ vle8.v v8, (a5)
+ add a4, a4, a6
+ vxor.vv v16, v0, v8
+ add a5, a5, a6
+ vse8.v v16, (a1)
+ add a1, a1, a6
+ bnez a0, xor_regs_5_
+ ret
+SYM_FUNC_END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 3a4dfc8babcf..2c869f8026a8 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -13,10 +13,9 @@ endif
KCOV_INSTRUMENT_init.o := n
obj-y += init.o
-obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o
+obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o
obj-y += cacheflush.o
obj-y += context.o
-obj-y += pgtable.o
obj-y += pmem.o
ifeq ($(CONFIG_MMU),y)
diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
index 4e4e469b8dd6..843107f834b2 100644
--- a/arch/riscv/mm/dma-noncoherent.c
+++ b/arch/riscv/mm/dma-noncoherent.c
@@ -129,7 +129,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
}
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
- const struct iommu_ops *iommu, bool coherent)
+ bool coherent)
{
WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN,
TAINT_CPU_OUT_OF_SPEC,
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 35484d830fd6..dd1530af3ef1 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
return true;
}
+static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
+{
+ if (unlikely(!offset || offset > MAX_REG_OFFSET))
+ return 0;
+
+ return *(unsigned long *)((unsigned long)regs + offset);
+}
+
static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
unsigned long val)
{
@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
return true;
}
+static bool
+ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
+ int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+ unsigned long data, addr, offset;
+
+ addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
+
+ offset = addr & 0x7UL;
+ addr &= ~0x7UL;
+
+ data = *(unsigned long *)addr >> (offset * 8);
+
+ regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
+
+ regs->epc = get_ex_fixup(ex);
+ return true;
+}
+
bool fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *ex;
@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
return ex_handler_bpf(ex, regs);
case EX_TYPE_UACCESS_ERR_ZERO:
return ex_handler_uaccess_err_zero(ex, regs);
+ case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
+ return ex_handler_load_unaligned_zeropad(ex, regs);
}
BUG();
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 081339ddf47e..3ba1d4dde5dd 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -136,24 +136,24 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
pgd = (pgd_t *)pfn_to_virt(pfn) + index;
pgd_k = init_mm.pgd + index;
- if (!pgd_present(*pgd_k)) {
+ if (!pgd_present(pgdp_get(pgd_k))) {
no_context(regs, addr);
return;
}
- set_pgd(pgd, *pgd_k);
+ set_pgd(pgd, pgdp_get(pgd_k));
p4d_k = p4d_offset(pgd_k, addr);
- if (!p4d_present(*p4d_k)) {
+ if (!p4d_present(p4dp_get(p4d_k))) {
no_context(regs, addr);
return;
}
pud_k = pud_offset(p4d_k, addr);
- if (!pud_present(*pud_k)) {
+ if (!pud_present(pudp_get(pud_k))) {
no_context(regs, addr);
return;
}
- if (pud_leaf(*pud_k))
+ if (pud_leaf(pudp_get(pud_k)))
goto flush_tlb;
/*
@@ -161,11 +161,11 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* to copy individual PTEs
*/
pmd_k = pmd_offset(pud_k, addr);
- if (!pmd_present(*pmd_k)) {
+ if (!pmd_present(pmdp_get(pmd_k))) {
no_context(regs, addr);
return;
}
- if (pmd_leaf(*pmd_k))
+ if (pmd_leaf(pmdp_get(pmd_k)))
goto flush_tlb;
/*
@@ -175,7 +175,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* silently loop forever.
*/
pte_k = pte_offset_kernel(pmd_k, addr);
- if (!pte_present(*pte_k)) {
+ if (!pte_present(ptep_get(pte_k))) {
no_context(regs, addr);
return;
}
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index b52f0210481f..431596c0e20e 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -54,7 +54,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
}
if (sz == PMD_SIZE) {
- if (want_pmd_share(vma, addr) && pud_none(*pud))
+ if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud)))
pte = huge_pmd_share(mm, vma, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
@@ -93,11 +93,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
- if (!pgd_present(*pgd))
+ if (!pgd_present(pgdp_get(pgd)))
return NULL;
p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d))
+ if (!p4d_present(p4dp_get(p4d)))
return NULL;
pud = pud_offset(p4d, addr);
@@ -105,7 +105,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
/* must be pud huge, non-present or none */
return (pte_t *)pud;
- if (!pud_present(*pud))
+ if (!pud_present(pudp_get(pud)))
return NULL;
pmd = pmd_offset(pud, addr);
@@ -113,7 +113,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
/* must be pmd huge, non-present or none */
return (pte_t *)pmd;
- if (!pmd_present(*pmd))
+ if (!pmd_present(pmdp_get(pmd)))
return NULL;
for_each_napot_order(order) {
@@ -293,7 +293,7 @@ void huge_pte_clear(struct mm_struct *mm,
pte_t *ptep,
unsigned long sz)
{
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = ptep_get(ptep);
int i, pte_num;
if (!pte_napot(pte)) {
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 2e011cbddf3a..32cad6a65ccd 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -174,6 +174,9 @@ void __init mem_init(void)
/* Limit the memory size via mem. */
static phys_addr_t memory_limit;
+#ifdef CONFIG_XIP_KERNEL
+#define memory_limit (*(phys_addr_t *)XIP_FIXUP(&memory_limit))
+#endif /* CONFIG_XIP_KERNEL */
static int __init early_mem(char *p)
{
@@ -952,7 +955,7 @@ static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va,
* setup_vm_final installs the linear mapping. For 32-bit kernel, as the
* kernel is mapped in the linear mapping, that makes no difference.
*/
- dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
+ dtb_early_va = kernel_mapping_pa_to_va(dtb_pa);
#endif
dtb_early_pa = dtb_pa;
@@ -1055,9 +1058,13 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
#endif
kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
- kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
#ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_64BIT
+ kernel_map.page_offset = PAGE_OFFSET_L3;
+#else
+ kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
+#endif
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
@@ -1067,6 +1074,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom;
#else
+ kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
#endif
@@ -1383,10 +1391,29 @@ void __init misc_mem_init(void)
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+ pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+ unsigned long addr, unsigned long next)
+{
+ vmemmap_verify((pte_t *)pmdp, node, addr, next);
+ return 1;
+}
+
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
- return vmemmap_populate_basepages(start, end, node, NULL);
+ /*
+ * Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we
+ * can't use hugepage mappings for 2-level page table because in case of
+ * memory hotplug, we are not able to update all the page tables with
+ * the new PMDs.
+ */
+ return vmemmap_populate_hugepages(start, end, node, NULL);
}
#endif
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 5e39dcf23fdb..c301c8d291d2 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -31,7 +31,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
phys_addr_t phys_addr;
pte_t *ptep, *p;
- if (pmd_none(*pmd)) {
+ if (pmd_none(pmdp_get(pmd))) {
p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -39,7 +39,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
ptep = pte_offset_kernel(pmd, vaddr);
do {
- if (pte_none(*ptep)) {
+ if (pte_none(ptep_get(ptep))) {
phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
memset(__va(phys_addr), KASAN_SHADOW_INIT, PAGE_SIZE);
@@ -53,7 +53,7 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned
pmd_t *pmdp, *p;
unsigned long next;
- if (pud_none(*pud)) {
+ if (pud_none(pudp_get(pud))) {
p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -63,7 +63,8 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned
do {
next = pmd_addr_end(vaddr, end);
- if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) {
+ if (pmd_none(pmdp_get(pmdp)) && IS_ALIGNED(vaddr, PMD_SIZE) &&
+ (next - vaddr) >= PMD_SIZE) {
phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
if (phys_addr) {
set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
@@ -83,7 +84,7 @@ static void __init kasan_populate_pud(p4d_t *p4d,
pud_t *pudp, *p;
unsigned long next;
- if (p4d_none(*p4d)) {
+ if (p4d_none(p4dp_get(p4d))) {
p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -93,7 +94,8 @@ static void __init kasan_populate_pud(p4d_t *p4d,
do {
next = pud_addr_end(vaddr, end);
- if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+ if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) &&
+ (next - vaddr) >= PUD_SIZE) {
phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
if (phys_addr) {
set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
@@ -113,7 +115,7 @@ static void __init kasan_populate_p4d(pgd_t *pgd,
p4d_t *p4dp, *p;
unsigned long next;
- if (pgd_none(*pgd)) {
+ if (pgd_none(pgdp_get(pgd))) {
p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE);
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
@@ -123,7 +125,8 @@ static void __init kasan_populate_p4d(pgd_t *pgd,
do {
next = p4d_addr_end(vaddr, end);
- if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
+ if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) &&
+ (next - vaddr) >= P4D_SIZE) {
phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
if (phys_addr) {
set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
@@ -145,7 +148,7 @@ static void __init kasan_populate_pgd(pgd_t *pgdp,
do {
next = pgd_addr_end(vaddr, end);
- if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
+ if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
(next - vaddr) >= PGDIR_SIZE) {
phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
if (phys_addr) {
@@ -168,7 +171,7 @@ static void __init kasan_early_clear_pud(p4d_t *p4dp,
if (!pgtable_l4_enabled) {
pudp = (pud_t *)p4dp;
} else {
- base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp))));
pudp = base_pud + pud_index(vaddr);
}
@@ -193,7 +196,7 @@ static void __init kasan_early_clear_p4d(pgd_t *pgdp,
if (!pgtable_l5_enabled) {
p4dp = (p4d_t *)pgdp;
} else {
- base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp))));
p4dp = base_p4d + p4d_index(vaddr);
}
@@ -239,14 +242,14 @@ static void __init kasan_early_populate_pud(p4d_t *p4dp,
if (!pgtable_l4_enabled) {
pudp = (pud_t *)p4dp;
} else {
- base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp))));
pudp = base_pud + pud_index(vaddr);
}
do {
next = pud_addr_end(vaddr, end);
- if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) &&
+ if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) &&
(next - vaddr) >= PUD_SIZE) {
phys_addr = __pa((uintptr_t)kasan_early_shadow_pmd);
set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
@@ -277,14 +280,14 @@ static void __init kasan_early_populate_p4d(pgd_t *pgdp,
if (!pgtable_l5_enabled) {
p4dp = (p4d_t *)pgdp;
} else {
- base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp))));
p4dp = base_p4d + p4d_index(vaddr);
}
do {
next = p4d_addr_end(vaddr, end);
- if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) &&
+ if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) &&
(next - vaddr) >= P4D_SIZE) {
phys_addr = __pa((uintptr_t)kasan_early_shadow_pud);
set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
@@ -305,7 +308,7 @@ static void __init kasan_early_populate_pgd(pgd_t *pgdp,
do {
next = pgd_addr_end(vaddr, end);
- if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
+ if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
(next - vaddr) >= PGDIR_SIZE) {
phys_addr = __pa((uintptr_t)kasan_early_shadow_p4d);
set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
@@ -381,7 +384,7 @@ static void __init kasan_shallow_populate_pud(p4d_t *p4d,
do {
next = pud_addr_end(vaddr, end);
- if (pud_none(*pud_k)) {
+ if (pud_none(pudp_get(pud_k))) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
@@ -401,7 +404,7 @@ static void __init kasan_shallow_populate_p4d(pgd_t *pgd,
do {
next = p4d_addr_end(vaddr, end);
- if (p4d_none(*p4d_k)) {
+ if (p4d_none(p4dp_get(p4d_k))) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
@@ -420,7 +423,7 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long
do {
next = pgd_addr_end(vaddr, end);
- if (pgd_none(*pgd_k)) {
+ if (pgd_none(pgdp_get(pgd_k))) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
@@ -438,6 +441,14 @@ static void __init kasan_shallow_populate(void *start, void *end)
kasan_shallow_populate_pgd(vaddr, vend);
}
+#ifdef CONFIG_KASAN_VMALLOC
+void __init kasan_populate_early_vm_area_shadow(void *start, unsigned long size)
+{
+ kasan_populate(kasan_mem_to_shadow(start),
+ kasan_mem_to_shadow(start + size));
+}
+#endif
+
static void __init create_tmp_mapping(void)
{
void *ptr;
@@ -451,7 +462,7 @@ static void __init create_tmp_mapping(void)
/* Copy the last p4d since it is shared with the kernel mapping. */
if (pgtable_l5_enabled) {
- ptr = (p4d_t *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
+ ptr = (p4d_t *)pgd_page_vaddr(pgdp_get(pgd_offset_k(KASAN_SHADOW_END)));
memcpy(tmp_p4d, ptr, sizeof(p4d_t) * PTRS_PER_P4D);
set_pgd(&tmp_pg_dir[pgd_index(KASAN_SHADOW_END)],
pfn_pgd(PFN_DOWN(__pa(tmp_p4d)), PAGE_TABLE));
@@ -462,7 +473,7 @@ static void __init create_tmp_mapping(void)
/* Copy the last pud since it is shared with the kernel mapping. */
if (pgtable_l4_enabled) {
- ptr = (pud_t *)p4d_page_vaddr(*(base_p4d + p4d_index(KASAN_SHADOW_END)));
+ ptr = (pud_t *)p4d_page_vaddr(p4dp_get(base_p4d + p4d_index(KASAN_SHADOW_END)));
memcpy(tmp_pud, ptr, sizeof(pud_t) * PTRS_PER_PUD);
set_p4d(&base_p4d[p4d_index(KASAN_SHADOW_END)],
pfn_p4d(PFN_DOWN(__pa(tmp_pud)), PAGE_TABLE));
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index fc5fc4f785c4..410056a50aa9 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -29,7 +29,7 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- p4d_t val = READ_ONCE(*p4d);
+ p4d_t val = p4dp_get(p4d);
if (p4d_leaf(val)) {
val = __p4d(set_pageattr_masks(p4d_val(val), walk));
@@ -42,7 +42,7 @@ static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- pud_t val = READ_ONCE(*pud);
+ pud_t val = pudp_get(pud);
if (pud_leaf(val)) {
val = __pud(set_pageattr_masks(pud_val(val), walk));
@@ -55,7 +55,7 @@ static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- pmd_t val = READ_ONCE(*pmd);
+ pmd_t val = pmdp_get(pmd);
if (pmd_leaf(val)) {
val = __pmd(set_pageattr_masks(pmd_val(val), walk));
@@ -68,7 +68,7 @@ static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- pte_t val = READ_ONCE(*pte);
+ pte_t val = ptep_get(pte);
val = __pte(set_pageattr_masks(pte_val(val), walk));
set_pte(pte, val);
@@ -108,10 +108,10 @@ static int __split_linear_mapping_pmd(pud_t *pudp,
vaddr <= (vaddr & PMD_MASK) && end >= next)
continue;
- if (pmd_leaf(*pmdp)) {
+ if (pmd_leaf(pmdp_get(pmdp))) {
struct page *pte_page;
- unsigned long pfn = _pmd_pfn(*pmdp);
- pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK);
+ unsigned long pfn = _pmd_pfn(pmdp_get(pmdp));
+ pgprot_t prot = __pgprot(pmd_val(pmdp_get(pmdp)) & ~_PAGE_PFN_MASK);
pte_t *ptep_new;
int i;
@@ -148,10 +148,10 @@ static int __split_linear_mapping_pud(p4d_t *p4dp,
vaddr <= (vaddr & PUD_MASK) && end >= next)
continue;
- if (pud_leaf(*pudp)) {
+ if (pud_leaf(pudp_get(pudp))) {
struct page *pmd_page;
- unsigned long pfn = _pud_pfn(*pudp);
- pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK);
+ unsigned long pfn = _pud_pfn(pudp_get(pudp));
+ pgprot_t prot = __pgprot(pud_val(pudp_get(pudp)) & ~_PAGE_PFN_MASK);
pmd_t *pmdp_new;
int i;
@@ -197,10 +197,10 @@ static int __split_linear_mapping_p4d(pgd_t *pgdp,
vaddr <= (vaddr & P4D_MASK) && end >= next)
continue;
- if (p4d_leaf(*p4dp)) {
+ if (p4d_leaf(p4dp_get(p4dp))) {
struct page *pud_page;
- unsigned long pfn = _p4d_pfn(*p4dp);
- pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK);
+ unsigned long pfn = _p4d_pfn(p4dp_get(p4dp));
+ pgprot_t prot = __pgprot(p4d_val(p4dp_get(p4dp)) & ~_PAGE_PFN_MASK);
pud_t *pudp_new;
int i;
@@ -305,8 +305,13 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
goto unlock;
}
} else if (is_kernel_mapping(start) || is_linear_mapping(start)) {
- lm_start = (unsigned long)lm_alias(start);
- lm_end = (unsigned long)lm_alias(end);
+ if (is_kernel_mapping(start)) {
+ lm_start = (unsigned long)lm_alias(start);
+ lm_end = (unsigned long)lm_alias(end);
+ } else {
+ lm_start = start;
+ lm_end = end;
+ }
ret = split_linear_mapping(lm_start, lm_end);
if (ret)
@@ -378,7 +383,7 @@ int set_direct_map_invalid_noflush(struct page *page)
int set_direct_map_default_noflush(struct page *page)
{
return __set_memory((unsigned long)page_address(page), 1,
- PAGE_KERNEL, __pgprot(0));
+ PAGE_KERNEL, __pgprot(_PAGE_EXEC));
}
#ifdef CONFIG_DEBUG_PAGEALLOC
@@ -406,29 +411,29 @@ bool kernel_page_present(struct page *page)
pte_t *pte;
pgd = pgd_offset_k(addr);
- if (!pgd_present(*pgd))
+ if (!pgd_present(pgdp_get(pgd)))
return false;
- if (pgd_leaf(*pgd))
+ if (pgd_leaf(pgdp_get(pgd)))
return true;
p4d = p4d_offset(pgd, addr);
- if (!p4d_present(*p4d))
+ if (!p4d_present(p4dp_get(p4d)))
return false;
- if (p4d_leaf(*p4d))
+ if (p4d_leaf(p4dp_get(p4d)))
return true;
pud = pud_offset(p4d, addr);
- if (!pud_present(*pud))
+ if (!pud_present(pudp_get(pud)))
return false;
- if (pud_leaf(*pud))
+ if (pud_leaf(pudp_get(pud)))
return true;
pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ if (!pmd_present(pmdp_get(pmd)))
return false;
- if (pmd_leaf(*pmd))
+ if (pmd_leaf(pmdp_get(pmd)))
return true;
pte = pte_offset_kernel(pmd, addr);
- return pte_present(*pte);
+ return pte_present(ptep_get(pte));
}
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index fef4e7328e49..ef887efcb679 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -5,6 +5,47 @@
#include <linux/kernel.h>
#include <linux/pgtable.h>
+int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ if (!pte_same(ptep_get(ptep), entry))
+ __set_pte_at(ptep, entry);
+ /*
+ * update_mmu_cache will unconditionally execute, handling both
+ * the case that the PTE changed and the spurious fault case.
+ */
+ return true;
+}
+
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
+{
+ if (!pte_young(ptep_get(ptep)))
+ return 0;
+ return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
+}
+EXPORT_SYMBOL_GPL(ptep_test_and_clear_young);
+
+#ifdef CONFIG_64BIT
+pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return p4d_pgtable(p4dp_get(p4d)) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
+p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+ if (pgtable_l5_enabled)
+ return pgd_pgtable(pgdp_get(pgd)) + p4d_index(address);
+
+ return (p4d_t *)pgd;
+}
+#endif
+
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
{
@@ -25,7 +66,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
int pud_clear_huge(pud_t *pud)
{
- if (!pud_leaf(READ_ONCE(*pud)))
+ if (!pud_leaf(pudp_get(pud)))
return 0;
pud_clear(pud);
return 1;
@@ -33,7 +74,7 @@ int pud_clear_huge(pud_t *pud)
int pud_free_pmd_page(pud_t *pud, unsigned long addr)
{
- pmd_t *pmd = pud_pgtable(*pud);
+ pmd_t *pmd = pud_pgtable(pudp_get(pud));
int i;
pud_clear(pud);
@@ -63,7 +104,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
int pmd_clear_huge(pmd_t *pmd)
{
- if (!pmd_leaf(READ_ONCE(*pmd)))
+ if (!pmd_leaf(pmdp_get(pmd)))
return 0;
pmd_clear(pmd);
return 1;
@@ -71,7 +112,7 @@ int pmd_clear_huge(pmd_t *pmd)
int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
{
- pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pte_t *pte = (pte_t *)pmd_page_vaddr(pmdp_get(pmd));
pmd_clear(pmd);
@@ -88,7 +129,7 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON(pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_trans_huge(pmdp_get(pmdp)));
/*
* When leaf PTE entries (regular pages) are collapsed into a leaf
* PMD entry (huge page), a valid non-leaf PTE is converted into a
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index e6659d7368b3..8d12b26f5ac3 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -66,6 +66,11 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
local_flush_tlb_range_threshold_asid(start, size, stride, asid);
}
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID);
+}
+
static void __ipi_flush_tlb_all(void *info)
{
local_flush_tlb_all();
@@ -93,29 +98,23 @@ static void __ipi_flush_tlb_range_asid(void *info)
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
}
-static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
- unsigned long size, unsigned long stride)
+static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
+ unsigned long start, unsigned long size,
+ unsigned long stride)
{
struct flush_tlb_range_data ftd;
- const struct cpumask *cmask;
- unsigned long asid = FLUSH_TLB_NO_ASID;
bool broadcast;
- if (mm) {
- unsigned int cpuid;
+ if (cpumask_empty(cmask))
+ return;
- cmask = mm_cpumask(mm);
- if (cpumask_empty(cmask))
- return;
+ if (cmask != cpu_online_mask) {
+ unsigned int cpuid;
cpuid = get_cpu();
/* check if the tlbflush needs to be sent to other CPUs */
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
-
- if (static_branch_unlikely(&use_asid_allocator))
- asid = atomic_long_read(&mm->context.id) & asid_mask;
} else {
- cmask = cpu_online_mask;
broadcast = true;
}
@@ -135,25 +134,34 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
local_flush_tlb_range_asid(start, size, stride, asid);
}
- if (mm)
+ if (cmask != cpu_online_mask)
put_cpu();
}
+static inline unsigned long get_mm_asid(struct mm_struct *mm)
+{
+ return static_branch_unlikely(&use_asid_allocator) ?
+ atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
+}
+
void flush_tlb_mm(struct mm_struct *mm)
{
- __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+ __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
+ 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
}
void flush_tlb_mm_range(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned int page_size)
{
- __flush_tlb_range(mm, start, end - start, page_size);
+ __flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
+ start, end - start, page_size);
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
- __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE);
+ __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+ addr, PAGE_SIZE, PAGE_SIZE);
}
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -185,18 +193,44 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
}
}
- __flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
+ __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+ start, end - start, stride_size);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
+ __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
+ start, end - start, PAGE_SIZE);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
- __flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE);
+ __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+ start, end - start, PMD_SIZE);
}
#endif
+
+bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+ return true;
+}
+
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+}
+
+void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+ flush_tlb_mm(mm);
+}
+
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+ __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
+ FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 58dc64dd94a8..719a97e7edb2 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -795,6 +795,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
void *orig_call = func_addr;
bool save_ret;
u32 insn;
@@ -878,7 +879,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
stack_size = round_up(stack_size, 16);
- if (func_addr) {
+ if (!is_struct_ops) {
/* For the trampoline called from function entry,
* the frame of traced function and the frame of
* trampoline need to be considered.
@@ -998,7 +999,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
- if (func_addr) {
+ if (!is_struct_ops) {
/* trampoline called from function entry */
emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx);
emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8f39f0424796..fe565f3a3a91 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -216,7 +216,6 @@ config S390
select HAVE_VIRT_CPU_ACCOUNTING_IDLE
select IOMMU_HELPER if PCI
select IOMMU_SUPPORT if PCI
- select KEXEC
select MMU_GATHER_MERGE_VMAS
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
@@ -443,7 +442,7 @@ config COMMAND_LINE_SIZE
line.
config COMPAT
- def_bool y
+ def_bool n
prompt "Kernel support for 31 bit emulation"
select ARCH_WANT_OLD_COMPAT_IPC
select COMPAT_OLD_SIGACTION
@@ -454,7 +453,9 @@ config COMPAT
Select this option if you want to enable your system kernel to
handle system-calls from ELF binaries for 31 bit ESA. This option
(and some other stuff like libraries and such) is needed for
- executing 31 bit applications. It is safe to say "Y".
+ executing 31 bit applications.
+
+ If unsure say N.
config SMP
def_bool y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 85490d9373fc..cae2dd34fbb4 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -40,6 +40,7 @@ CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
+CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
@@ -636,8 +637,9 @@ CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_SUPPORT=m
CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index fb690fbbf54b..42b988873e54 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -38,6 +38,7 @@ CONFIG_SCHED_AUTOGROUP=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_PROFILING=y
+CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
CONFIG_CRASH_DUMP=y
@@ -621,8 +622,9 @@ CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_SUPPORT=m
CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 47028450eee1..30d2a1687665 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -10,7 +10,6 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_CRASH_DUMP=y
CONFIG_MARCH_Z13=y
-# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
# CONFIG_CHSC_SCH is not set
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 94b6919026df..796007125dff 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -111,4 +111,10 @@ static inline void stfle(u64 *stfle_fac_list, int size)
preempt_enable();
}
+/**
+ * stfle_size - Actual size of the facility list as specified by stfle
+ * (number of double words)
+ */
+unsigned int stfle_size(void);
+
#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 67a298b6cf6e..52664105a473 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -818,7 +818,7 @@ struct s390_io_adapter {
struct kvm_s390_cpu_model {
/* facility mask supported by kvm & hosting machine */
- __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+ __u64 fac_mask[S390_ARCH_FAC_MASK_SIZE_U64];
struct kvm_s390_vm_cpu_subfunc subfuncs;
/* facility list requested by guest (in dma page) */
__u64 *fac_list;
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 287bb88f7698..2686bee800e3 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -11,6 +11,8 @@
/* I/O size constraints */
#define ZPCI_MAX_READ_SIZE 8
#define ZPCI_MAX_WRITE_SIZE 128
+#define ZPCI_BOUNDARY_SIZE (1 << 12)
+#define ZPCI_BOUNDARY_MASK (ZPCI_BOUNDARY_SIZE - 1)
/* I/O Map */
#define ZPCI_IOMAP_SHIFT 48
@@ -125,16 +127,18 @@ out:
int zpci_write_block(volatile void __iomem *dst, const void *src,
unsigned long len);
-static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
+static inline int zpci_get_max_io_size(u64 src, u64 dst, int len, int max)
{
- int count = len > max ? max : len, size = 1;
+ int offset = dst & ZPCI_BOUNDARY_MASK;
+ int size;
- while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) {
- dst = dst >> 1;
- src = src >> 1;
- size = size << 1;
- }
- return size;
+ size = min3(len, ZPCI_BOUNDARY_SIZE - offset, max);
+ if (IS_ALIGNED(src, 8) && IS_ALIGNED(dst, 8) && IS_ALIGNED(size, 8))
+ return size;
+
+ if (size >= 8)
+ return 8;
+ return rounddown_pow_of_two(size);
}
static inline int zpci_memcpy_fromio(void *dst,
@@ -144,9 +148,9 @@ static inline int zpci_memcpy_fromio(void *dst,
int size, rc = 0;
while (n > 0) {
- size = zpci_get_max_write_size((u64 __force) src,
- (u64) dst, n,
- ZPCI_MAX_READ_SIZE);
+ size = zpci_get_max_io_size((u64 __force) src,
+ (u64) dst, n,
+ ZPCI_MAX_READ_SIZE);
rc = zpci_read_single(dst, src, size);
if (rc)
break;
@@ -166,9 +170,9 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
return -EINVAL;
while (n > 0) {
- size = zpci_get_max_write_size((u64 __force) dst,
- (u64) src, n,
- ZPCI_MAX_WRITE_SIZE);
+ size = zpci_get_max_io_size((u64 __force) dst,
+ (u64) src, n,
+ ZPCI_MAX_WRITE_SIZE);
if (size > 8) /* main path */
rc = zpci_write_block(dst, src, size);
else
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 353def93973b..7a562b4199c8 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -41,7 +41,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
extra-y += vmlinux.lds
diff --git a/arch/s390/kernel/facility.c b/arch/s390/kernel/facility.c
new file mode 100644
index 000000000000..f02127219a27
--- /dev/null
+++ b/arch/s390/kernel/facility.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2023
+ */
+
+#include <asm/facility.h>
+
+unsigned int stfle_size(void)
+{
+ static unsigned int size;
+ unsigned int r;
+ u64 dummy;
+
+ r = READ_ONCE(size);
+ if (!r) {
+ r = __stfle_asm(&dummy, 1) + 1;
+ WRITE_ONCE(size, r);
+ }
+ return r;
+}
+EXPORT_SYMBOL(stfle_size);
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 9e7c15fccfea..a4f3449cc814 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -208,7 +208,6 @@ void __load_fpu_regs(void)
}
clear_cpu_flag(CIF_FPU);
}
-EXPORT_SYMBOL(__load_fpu_regs);
void load_fpu_regs(void)
{
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 39a91b00438a..bf8a672b15a4 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -111,11 +111,11 @@ static void paicrypt_event_destroy(struct perf_event *event)
mutex_unlock(&pai_reserve_mutex);
}
-static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
{
if (kernel)
nr += PAI_CRYPTO_MAXCTR;
- return cpump->page[nr];
+ return page[nr];
}
/* Read the counter values. Return value from location in CMP. For event
@@ -129,13 +129,13 @@ static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
int i;
if (event->attr.config != PAI_CRYPTO_BASE) {
- return paicrypt_getctr(cpump,
+ return paicrypt_getctr(cpump->page,
event->attr.config - PAI_CRYPTO_BASE,
kernel);
}
for (i = 1; i <= paicrypt_cnt; i++) {
- u64 val = paicrypt_getctr(cpump, i, kernel);
+ u64 val = paicrypt_getctr(cpump->page, i, kernel);
if (!val)
continue;
@@ -317,10 +317,14 @@ static void paicrypt_start(struct perf_event *event, int flags)
* Events are added, deleted and re-added when 2 or more events
* are active at the same time.
*/
- if (!event->hw.last_tag) {
- event->hw.last_tag = 1;
- sum = paicrypt_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
+ if (!event->attr.sample_period) { /* Counting */
+ if (!event->hw.last_tag) {
+ event->hw.last_tag = 1;
+ sum = paicrypt_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ }
+ } else { /* Sampling */
+ perf_sched_cb_inc(event->pmu);
}
}
@@ -336,19 +340,18 @@ static int paicrypt_add(struct perf_event *event, int flags)
local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
}
cpump->event = event;
- if (flags & PERF_EF_START && !event->attr.sample_period) {
- /* Only counting needs initial counter value */
+ if (flags & PERF_EF_START)
paicrypt_start(event, PERF_EF_RELOAD);
- }
event->hw.state = 0;
- if (event->attr.sample_period)
- perf_sched_cb_inc(event->pmu);
return 0;
}
static void paicrypt_stop(struct perf_event *event, int flags)
{
- paicrypt_read(event);
+ if (!event->attr.sample_period) /* Counting */
+ paicrypt_read(event);
+ else /* Sampling */
+ perf_sched_cb_dec(event->pmu);
event->hw.state = PERF_HES_STOPPED;
}
@@ -357,11 +360,7 @@ static void paicrypt_del(struct perf_event *event, int flags)
struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
struct paicrypt_map *cpump = mp->mapptr;
- if (event->attr.sample_period)
- perf_sched_cb_dec(event->pmu);
- if (!event->attr.sample_period)
- /* Only counting needs to read counter */
- paicrypt_stop(event, PERF_EF_UPDATE);
+ paicrypt_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
WRITE_ONCE(S390_lowcore.ccd, 0);
@@ -373,8 +372,7 @@ static void paicrypt_del(struct perf_event *event, int flags)
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
-static size_t paicrypt_copy(struct pai_userdata *userdata,
- struct paicrypt_map *cpump,
+static size_t paicrypt_copy(struct pai_userdata *userdata, unsigned long *page,
bool exclude_user, bool exclude_kernel)
{
int i, outidx = 0;
@@ -383,9 +381,9 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
u64 val = 0;
if (!exclude_kernel)
- val += paicrypt_getctr(cpump, i, true);
+ val += paicrypt_getctr(page, i, true);
if (!exclude_user)
- val += paicrypt_getctr(cpump, i, false);
+ val += paicrypt_getctr(page, i, false);
if (val) {
userdata[outidx].num = i;
userdata[outidx].value = val;
@@ -395,25 +393,14 @@ static size_t paicrypt_copy(struct pai_userdata *userdata,
return outidx * sizeof(struct pai_userdata);
}
-static int paicrypt_push_sample(void)
+static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
+ struct perf_event *event)
{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
- size_t rawsize;
int overflow;
- if (!cpump->event) /* No event active */
- return 0;
- rawsize = paicrypt_copy(cpump->save, cpump,
- cpump->event->attr.exclude_user,
- cpump->event->attr.exclude_kernel);
- if (!rawsize) /* No incremented counters */
- return 0;
-
/* Setup perf sample */
memset(&regs, 0, sizeof(regs));
memset(&raw, 0, sizeof(raw));
@@ -444,6 +431,25 @@ static int paicrypt_push_sample(void)
return overflow;
}
+/* Check if there is data to be saved on schedule out of a task. */
+static int paicrypt_have_sample(void)
+{
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+ struct perf_event *event = cpump->event;
+ size_t rawsize;
+ int rc = 0;
+
+ if (!event) /* No event active */
+ return 0;
+ rawsize = paicrypt_copy(cpump->save, cpump->page,
+ cpump->event->attr.exclude_user,
+ cpump->event->attr.exclude_kernel);
+ if (rawsize) /* No incremented counters */
+ rc = paicrypt_push_sample(rawsize, cpump, event);
+ return rc;
+}
+
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event CRYPTO_ALL is allowed.
*/
@@ -453,7 +459,7 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sch
* results on schedule_out and if page was dirty, clear values.
*/
if (!sched_in)
- paicrypt_push_sample();
+ paicrypt_have_sample();
}
/* Attribute definitions for paicrypt interface. As with other CPU
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index e7013a2e8960..af7f2b538c8f 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -276,9 +276,9 @@ static int paiext_event_init(struct perf_event *event)
return 0;
}
-static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+static u64 paiext_getctr(unsigned long *area, int nr)
{
- return cpump->area[nr];
+ return area[nr];
}
/* Read the counter values. Return value from location in buffer. For event
@@ -292,10 +292,11 @@ static u64 paiext_getdata(struct perf_event *event)
int i;
if (event->attr.config != PAI_NNPA_BASE)
- return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+ return paiext_getctr(cpump->area,
+ event->attr.config - PAI_NNPA_BASE);
for (i = 1; i <= paiext_cnt; i++)
- sum += paiext_getctr(cpump, i);
+ sum += paiext_getctr(cpump->area, i);
return sum;
}
@@ -320,11 +321,15 @@ static void paiext_start(struct perf_event *event, int flags)
{
u64 sum;
- if (event->hw.last_tag)
- return;
- event->hw.last_tag = 1;
- sum = paiext_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
+ if (!event->attr.sample_period) { /* Counting */
+ if (!event->hw.last_tag) {
+ event->hw.last_tag = 1;
+ sum = paiext_getall(event); /* Get current value */
+ local64_set(&event->hw.prev_count, sum);
+ }
+ } else { /* Sampling */
+ perf_sched_cb_inc(event->pmu);
+ }
}
static int paiext_add(struct perf_event *event, int flags)
@@ -341,21 +346,19 @@ static int paiext_add(struct perf_event *event, int flags)
debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
__func__, S390_lowcore.aicd, pcb->acc);
}
- if (flags & PERF_EF_START && !event->attr.sample_period) {
- /* Only counting needs initial counter value */
+ cpump->event = event;
+ if (flags & PERF_EF_START)
paiext_start(event, PERF_EF_RELOAD);
- }
event->hw.state = 0;
- if (event->attr.sample_period) {
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
- }
return 0;
}
static void paiext_stop(struct perf_event *event, int flags)
{
- paiext_read(event);
+ if (!event->attr.sample_period) /* Counting */
+ paiext_read(event);
+ else /* Sampling */
+ perf_sched_cb_dec(event->pmu);
event->hw.state = PERF_HES_STOPPED;
}
@@ -365,12 +368,7 @@ static void paiext_del(struct perf_event *event, int flags)
struct paiext_map *cpump = mp->mapptr;
struct paiext_cb *pcb = cpump->paiext_cb;
- if (event->attr.sample_period)
- perf_sched_cb_dec(event->pmu);
- if (!event->attr.sample_period) {
- /* Only counting needs to read counter */
- paiext_stop(event, PERF_EF_UPDATE);
- }
+ paiext_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
/* Disable CPU instruction lookup for PAIE1 control block */
local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
@@ -386,13 +384,12 @@ static void paiext_del(struct perf_event *event, int flags)
* 2 bytes: Number of counter
* 8 bytes: Value of counter
*/
-static size_t paiext_copy(struct paiext_map *cpump)
+static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area)
{
- struct pai_userdata *userdata = cpump->save;
int i, outidx = 0;
for (i = 1; i <= paiext_cnt; i++) {
- u64 val = paiext_getctr(cpump, i);
+ u64 val = paiext_getctr(area, i);
if (val) {
userdata[outidx].num = i;
@@ -418,21 +415,14 @@ static size_t paiext_copy(struct paiext_map *cpump)
* sched_task() callback. That callback is not active after paiext_del()
* returns and has deleted the event on that CPU.
*/
-static int paiext_push_sample(void)
+static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
+ struct perf_event *event)
{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
- size_t rawsize;
int overflow;
- rawsize = paiext_copy(cpump);
- if (!rawsize) /* No incremented counters */
- return 0;
-
/* Setup perf sample */
memset(&regs, 0, sizeof(regs));
memset(&raw, 0, sizeof(raw));
@@ -461,6 +451,23 @@ static int paiext_push_sample(void)
return overflow;
}
+/* Check if there is data to be saved on schedule out of a task. */
+static int paiext_have_sample(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event = cpump->event;
+ size_t rawsize;
+ int rc = 0;
+
+ if (!event)
+ return 0;
+ rawsize = paiext_copy(cpump->save, cpump->area);
+ if (rawsize) /* Incremented counters */
+ rc = paiext_push_sample(rawsize, cpump, event);
+ return rc;
+}
+
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event NNPA_ALL is allowed.
*/
@@ -470,7 +477,7 @@ static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched
* results on schedule_out and if page was dirty, clear values.
*/
if (!sched_in)
- paiext_push_sample();
+ paiext_have_sample();
}
/* Attribute definitions for pai extension1 interface. As with other CPU
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 2e6754b62b20..f1897a8bb221 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -917,7 +917,6 @@ static int s390_fpregs_set(struct task_struct *target,
else
memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
- /* If setting FPC, must validate it first. */
if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 45fdf2a9b2e3..72e9b7dcdf7d 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -20,19 +20,16 @@ config KVM
def_tristate y
prompt "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
- select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_VCPU_ASYNC_IOCTL
- select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
+ select KVM_COMMON
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_INVALID_WAKEUPS
select HAVE_KVM_NO_POLL
select KVM_VFIO
- select INTERVAL_TREE
select MMU_NOTIFIER
help
Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 3765c4223bf9..80879fc73c90 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -213,8 +213,8 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
return -EINVAL;
- bp_data = memdup_user(dbg->arch.hw_bp,
- sizeof(*bp_data) * dbg->arch.nr_hw_bp);
+ bp_data = memdup_array_user(dbg->arch.hw_bp, dbg->arch.nr_hw_bp,
+ sizeof(*bp_data));
if (IS_ERR(bp_data))
return PTR_ERR(bp_data);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index acc81ca6492e..ea63ac769889 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -563,7 +563,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
case KVM_CAP_MP_STATE:
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 8207a892bbe2..fef42e2a80a2 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -19,6 +19,7 @@
#include <asm/nmi.h>
#include <asm/dis.h>
#include <asm/fpu/api.h>
+#include <asm/facility.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -984,12 +985,26 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
+ __u32 fac = READ_ONCE(vsie_page->scb_o->fac);
+ /*
+ * Alternate-STFLE-Interpretive-Execution facilities are not supported
+ * -> format-0 flcb
+ */
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
retry_vsie_icpt(vsie_page);
+ /*
+ * The facility list origin (FLO) is in bits 1 - 28 of the FLD
+ * so we need to mask here before reading.
+ */
+ fac = fac & 0x7ffffff8U;
+ /*
+ * format-0 -> size of nested guest's facility list == guest's size
+ * guest's size == host's size, since STFLE is interpretatively executed
+ * using a format-0 for the guest, too.
+ */
if (read_guest_real(vcpu, fac, &vsie_page->fac,
- sizeof(vsie_page->fac)))
+ stfle_size() * sizeof(u64)))
return set_validity_icpt(scb_s, 0x1090U);
scb_s->fac = (__u32)(__u64) &vsie_page->fac;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ab4098886e56..ac4c78546d97 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -280,7 +280,6 @@ static void do_sigbus(struct pt_regs *regs)
static void do_exception(struct pt_regs *regs, int access)
{
struct vm_area_struct *vma;
- struct task_struct *tsk;
unsigned long address;
struct mm_struct *mm;
enum fault_type type;
@@ -289,7 +288,6 @@ static void do_exception(struct pt_regs *regs, int access)
vm_fault_t fault;
bool is_write;
- tsk = current;
/*
* The instruction that caused the program check has
* been nullified. Don't signal single step via SIGTRAP.
@@ -297,7 +295,7 @@ static void do_exception(struct pt_regs *regs, int access)
clear_thread_flag(TIF_PER_TRAP);
if (kprobe_page_fault(regs, 14))
return;
- mm = tsk->mm;
+ mm = current->mm;
address = get_fault_address(regs);
is_write = fault_is_write(regs);
type = get_fault_type(regs);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 588089332931..a90499c087f0 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -97,9 +97,9 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
return -EINVAL;
while (n > 0) {
- size = zpci_get_max_write_size((u64 __force) dst,
- (u64 __force) src, n,
- ZPCI_MAX_WRITE_SIZE);
+ size = zpci_get_max_io_size((u64 __force) dst,
+ (u64 __force) src, n,
+ ZPCI_MAX_WRITE_SIZE);
if (size > 8) /* main path */
rc = __pcistb_mio_inuser(dst, src, size, &status);
else
@@ -242,9 +242,9 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
u8 status;
while (n > 0) {
- size = zpci_get_max_write_size((u64 __force) src,
- (u64 __force) dst, n,
- ZPCI_MAX_READ_SIZE);
+ size = zpci_get_max_io_size((u64 __force) src,
+ (u64 __force) dst, n,
+ ZPCI_MAX_READ_SIZE);
rc = __pcilg_mio_inuser(dst, src, size, &status);
if (rc)
break;
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 0f279360838a..30d117f9ad7e 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1220,7 +1220,7 @@ static int __init arch_setup(void)
lcdc_info.ch[0].num_modes = ARRAY_SIZE(ecovec_dvi_modes);
/* No backlight */
- gpio_backlight_data.fbdev = NULL;
+ gpio_backlight_data.dev = NULL;
gpio_set_value(GPIO_PTA2, 1);
gpio_set_value(GPIO_PTU1, 1);
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index cf59b98446e4..7b427c17fbfe 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -171,7 +171,8 @@ CONFIG_BTRFS_FS=y
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=y
CONFIG_CUSE=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 878b6b551bd2..51112f54552b 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -90,6 +90,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma,
unsigned long len);
#define flush_cache_vmap(start, end) local_flush_cache_all(NULL)
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) local_flush_cache_all(NULL)
#define flush_dcache_mmap_lock(mapping) do { } while (0)
diff --git a/arch/sh/kernel/vsyscall/Makefile b/arch/sh/kernel/vsyscall/Makefile
index 6e8664448048..118744d349e2 100644
--- a/arch/sh/kernel/vsyscall/Makefile
+++ b/arch/sh/kernel/vsyscall/Makefile
@@ -1,11 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
obj-y += vsyscall.o vsyscall-syscall.o vsyscall-syms.o
-$(obj)/vsyscall-syscall.o: \
- $(foreach F,trapa,$(obj)/vsyscall-$F.so)
+$(obj)/vsyscall-syscall.o: $(obj)/vsyscall-trapa.so
# Teach kbuild about targets
-targets += $(foreach F,trapa,vsyscall-$F.o vsyscall-$F.so)
+targets += vsyscall-trapa.o vsyscall-traps.so
targets += vsyscall-note.o vsyscall.lds vsyscall-dummy.o
# The DSO images are built using a special linker script
diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h
index f3b7270bf71b..9fee0ccfccb8 100644
--- a/arch/sparc/include/asm/cacheflush_32.h
+++ b/arch/sparc/include/asm/cacheflush_32.h
@@ -48,6 +48,7 @@ static inline void flush_dcache_page(struct page *page)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_cache_vmap(start, end) flush_cache_all()
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) flush_cache_all()
/* When a context switch happens we must flush all user windows so that
diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h
index 0e879004efff..2b1261b77ecd 100644
--- a/arch/sparc/include/asm/cacheflush_64.h
+++ b/arch/sparc/include/asm/cacheflush_64.h
@@ -75,6 +75,7 @@ void flush_ptrace_access(struct vm_area_struct *, struct page *,
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_cache_vmap(start, end) do { } while (0)
+#define flush_cache_vmap_early(start, end) do { } while (0)
#define flush_cache_vunmap(start, end) do { } while (0)
#endif /* !__ASSEMBLY__ */
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 3c38ca40a22b..a84598568300 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -13,7 +13,10 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
#include <asm/apb.h>
#include <asm/iommu.h>
@@ -456,7 +459,6 @@ static void sabre_pbm_init(struct pci_pbm_info *pbm,
static const struct of_device_id sabre_match[];
static int sabre_probe(struct platform_device *op)
{
- const struct of_device_id *match;
const struct linux_prom64_registers *pr_regs;
struct device_node *dp = op->dev.of_node;
struct pci_pbm_info *pbm;
@@ -466,8 +468,7 @@ static int sabre_probe(struct platform_device *op)
const u32 *vdma;
u64 clear_irq;
- match = of_match_device(sabre_match, &op->dev);
- hummingbird_p = match && (match->data != NULL);
+ hummingbird_p = (uintptr_t)device_get_match_data(&op->dev);
if (!hummingbird_p) {
struct device_node *cpu_dp;
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index 23b47f7fdb1d..5d8dd4949586 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -11,7 +11,10 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/interrupt.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/numa.h>
#include <asm/iommu.h>
@@ -1459,15 +1462,13 @@ out_err:
return err;
}
-static const struct of_device_id schizo_match[];
static int schizo_probe(struct platform_device *op)
{
- const struct of_device_id *match;
+ unsigned long chip_type = (unsigned long)device_get_match_data(&op->dev);
- match = of_match_device(schizo_match, &op->dev);
- if (!match)
+ if (!chip_type)
return -EINVAL;
- return __schizo_init(op, (unsigned long)match->data);
+ return __schizo_init(op, chip_type);
}
/* The ordering of this table is very important. Some Tomatillo
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index d08c3a0443f3..7f5eedf1f5e0 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -3,9 +3,6 @@
# Building vDSO images for sparc.
#
-VDSO64-$(CONFIG_SPARC64) := y
-VDSOCOMPAT-$(CONFIG_COMPAT) := y
-
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o
@@ -13,22 +10,15 @@ vobjs-y := vdso-note.o vclock_gettime.o
obj-y += vma.o
# vDSO images to build
-vdso_img-$(VDSO64-y) += 64
-vdso_img-$(VDSOCOMPAT-y) += 32
+obj-$(CONFIG_SPARC64) += vdso-image-64.o
+obj-$(CONFIG_COMPAT) += vdso-image-32.o
-vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+vobjs := $(addprefix $(obj)/, $(vobjs-y))
$(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.lds $(vobjs-y)
-
-# Build the vDSO image C files and link them in.
-vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
-vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
-vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
-obj-y += $(vdso_img_objs)
-targets += $(vdso_img_cfiles)
-targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
+targets += $(foreach x, 32 64, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg)
CPPFLAGS_vdso.lds += -P -C
diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas
index ac35de5316a6..67323b028999 100644
--- a/arch/um/Makefile-skas
+++ b/arch/um/Makefile-skas
@@ -4,7 +4,12 @@
#
GPROF_OPT += -pg
+
+ifdef CONFIG_CC_IS_CLANG
+GCOV_OPT += -fprofile-instr-generate -fcoverage-mapping
+else
GCOV_OPT += -fprofile-arcs -ftest-coverage
+endif
CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT)
CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT)
diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h
index 3fec3b8406e9..e14b9cdf7a33 100644
--- a/arch/um/drivers/chan.h
+++ b/arch/um/drivers/chan.h
@@ -30,7 +30,7 @@ struct chan {
extern void chan_interrupt(struct line *line, int irq);
extern int parse_chan_pair(char *str, struct line *line, int device,
const struct chan_opts *opts, char **error_out);
-extern int write_chan(struct chan *chan, const char *buf, int len,
+extern int write_chan(struct chan *chan, const u8 *buf, size_t len,
int write_irq);
extern int console_write_chan(struct chan *chan, const char *buf,
int len);
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 26a702a06515..37538b4168da 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -33,14 +33,14 @@ static void not_configged_close(int fd, void *data)
"UML\n");
}
-static int not_configged_read(int fd, char *c_out, void *data)
+static int not_configged_read(int fd, u8 *c_out, void *data)
{
printk(KERN_ERR "Using a channel type which is configured out of "
"UML\n");
return -EIO;
}
-static int not_configged_write(int fd, const char *buf, int len, void *data)
+static int not_configged_write(int fd, const u8 *buf, size_t len, void *data)
{
printk(KERN_ERR "Using a channel type which is configured out of "
"UML\n");
@@ -247,8 +247,7 @@ void deactivate_chan(struct chan *chan, int irq)
deactivate_fd(chan->fd, irq);
}
-int write_chan(struct chan *chan, const char *buf, int len,
- int write_irq)
+int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
{
int n, ret = 0;
@@ -540,7 +539,7 @@ void chan_interrupt(struct line *line, int irq)
struct tty_port *port = &line->port;
struct chan *chan = line->chan_in;
int err;
- char c;
+ u8 c;
if (!chan || !chan->ops->read)
goto out;
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index 25727ed648b7..ec04e47b9d79 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -19,7 +19,7 @@ void generic_close(int fd, void *unused)
close(fd);
}
-int generic_read(int fd, char *c_out, void *unused)
+int generic_read(int fd, __u8 *c_out, void *unused)
{
int n;
@@ -35,7 +35,7 @@ int generic_read(int fd, char *c_out, void *unused)
/* XXX Trivial wrapper around write */
-int generic_write(int fd, const char *buf, int n, void *unused)
+int generic_write(int fd, const __u8 *buf, size_t n, void *unused)
{
int err;
@@ -141,7 +141,7 @@ struct winch_data {
int pipe_fd;
};
-static int winch_thread(void *arg)
+static __noreturn int winch_thread(void *arg)
{
struct winch_data *data = arg;
sigset_t sigs;
@@ -153,8 +153,8 @@ static int winch_thread(void *arg)
pipe_fd = data->pipe_fd;
count = write(pipe_fd, &c, sizeof(c));
if (count != sizeof(c))
- printk(UM_KERN_ERR "winch_thread : failed to write "
- "synchronization byte, err = %d\n", -count);
+ os_info("winch_thread : failed to write synchronization byte, err = %d\n",
+ -count);
/*
* We are not using SIG_IGN on purpose, so don't fix it as I thought to
@@ -166,29 +166,29 @@ static int winch_thread(void *arg)
sigfillset(&sigs);
/* Block all signals possible. */
if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) {
- printk(UM_KERN_ERR "winch_thread : sigprocmask failed, "
- "errno = %d\n", errno);
- exit(1);
+ os_info("winch_thread : sigprocmask failed, errno = %d\n",
+ errno);
+ goto wait_kill;
}
/* In sigsuspend(), block anything else than SIGWINCH. */
sigdelset(&sigs, SIGWINCH);
if (setsid() < 0) {
- printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n",
+ os_info("winch_thread : setsid failed, errno = %d\n",
errno);
- exit(1);
+ goto wait_kill;
}
if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) {
- printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on "
- "fd %d err = %d\n", pty_fd, errno);
- exit(1);
+ os_info("winch_thread : TIOCSCTTY failed on "
+ "fd %d err = %d\n", pty_fd, errno);
+ goto wait_kill;
}
if (tcsetpgrp(pty_fd, os_getpid()) < 0) {
- printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on "
- "fd %d err = %d\n", pty_fd, errno);
- exit(1);
+ os_info("winch_thread : tcsetpgrp failed on fd %d err = %d\n",
+ pty_fd, errno);
+ goto wait_kill;
}
/*
@@ -199,8 +199,8 @@ static int winch_thread(void *arg)
*/
count = read(pipe_fd, &c, sizeof(c));
if (count != sizeof(c))
- printk(UM_KERN_ERR "winch_thread : failed to read "
- "synchronization byte, err = %d\n", errno);
+ os_info("winch_thread : failed to read synchronization byte, err = %d\n",
+ errno);
while(1) {
/*
@@ -211,9 +211,15 @@ static int winch_thread(void *arg)
count = write(pipe_fd, &c, sizeof(c));
if (count != sizeof(c))
- printk(UM_KERN_ERR "winch_thread : write failed, "
- "err = %d\n", errno);
+ os_info("winch_thread : write failed, err = %d\n",
+ errno);
}
+
+wait_kill:
+ c = 2;
+ count = write(pipe_fd, &c, sizeof(c));
+ while (1)
+ pause();
}
static int winch_tramp(int fd, struct tty_port *port, int *fd_out,
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
index 4e51b85e2a23..e158e16fb3cc 100644
--- a/arch/um/drivers/chan_user.h
+++ b/arch/um/drivers/chan_user.h
@@ -7,6 +7,7 @@
#define __CHAN_USER_H__
#include <init.h>
+#include <linux/types.h>
struct chan_opts {
void (*const announce)(char *dev_name, int dev);
@@ -19,8 +20,8 @@ struct chan_ops {
void *(*init)(char *, int, const struct chan_opts *);
int (*open)(int, int, int, void *, char **);
void (*close)(int, void *);
- int (*read)(int, char *, void *);
- int (*write)(int, const char *, int, void *);
+ int (*read)(int, __u8 *, void *);
+ int (*write)(int, const __u8 *, size_t, void *);
int (*console_write)(int, const char *, int);
int (*window_size)(int, void *, unsigned short *, unsigned short *);
void (*free)(void *);
@@ -31,8 +32,8 @@ extern const struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops,
tty_ops, xterm_ops;
extern void generic_close(int fd, void *unused);
-extern int generic_read(int fd, char *c_out, void *unused);
-extern int generic_write(int fd, const char *buf, int n, void *unused);
+extern int generic_read(int fd, __u8 *c_out, void *unused);
+extern int generic_write(int fd, const __u8 *buf, size_t n, void *unused);
extern int generic_console_write(int fd, const char *buf, int n);
extern int generic_window_size(int fd, void *unused, unsigned short *rows_out,
unsigned short *cols_out);
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index b98545f3edb5..ffc5cb92fa36 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -83,7 +83,7 @@ unsigned int line_chars_in_buffer(struct tty_struct *tty)
*
* Must be called while holding line->lock!
*/
-static int buffer_data(struct line *line, const char *buf, int len)
+static int buffer_data(struct line *line, const u8 *buf, size_t len)
{
int end, room;
@@ -629,15 +629,18 @@ static irqreturn_t winch_interrupt(int irq, void *data)
if (fd != -1) {
err = generic_read(fd, &c, NULL);
- if (err < 0) {
+ /* A read of 2 means the winch thread failed and has warned */
+ if (err < 0 || (err == 1 && c == 2)) {
if (err != -EAGAIN) {
winch->fd = -1;
list_del(&winch->list);
os_close_file(fd);
- printk(KERN_ERR "winch_interrupt : "
- "read failed, errno = %d\n", -err);
- printk(KERN_ERR "fd %d is losing SIGWINCH "
- "support\n", winch->tty_fd);
+ if (err < 0) {
+ printk(KERN_ERR "winch_interrupt : read failed, errno = %d\n",
+ -err);
+ printk(KERN_ERR "fd %d is losing SIGWINCH support\n",
+ winch->tty_fd);
+ }
INIT_WORK(&winch->work, __free_winch);
schedule_work(&winch->work);
return IRQ_HANDLED;
diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
index e84fb9b4165e..e8bd6f3dfb50 100644
--- a/arch/um/drivers/line.h
+++ b/arch/um/drivers/line.h
@@ -47,9 +47,9 @@ struct line {
*
* buffer points to a buffer allocated on demand, of length
* LINE_BUFSIZE, head to the start of the ring, tail to the end.*/
- char *buffer;
- char *head;
- char *tail;
+ u8 *buffer;
+ u8 *head;
+ u8 *tail;
int sigio;
struct delayed_work task;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 3d7836c46507..cabcc501b448 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -204,7 +204,7 @@ static int uml_net_close(struct net_device *dev)
return 0;
}
-static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct uml_net_private *lp = netdev_priv(dev);
unsigned long flags;
diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c
index 87087763a417..30d59b8481b4 100644
--- a/arch/um/drivers/null.c
+++ b/arch/um/drivers/null.c
@@ -28,7 +28,7 @@ static int null_open(int input, int output, int primary, void *d,
return (fd < 0) ? -errno : fd;
}
-static int null_read(int fd, char *c_out, void *unused)
+static int null_read(int fd, __u8 *c_out, void *unused)
{
return -ENODEV;
}
diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index ffe2ee8a0246..97a37c062997 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -971,7 +971,7 @@ static long um_pci_map_platform(unsigned long offset, size_t size,
*ops = &um_pci_device_bar_ops;
*priv = &um_pci_platform_device->resptr[0];
- return 0;
+ return offset;
}
static const struct logic_iomem_region_ops um_pci_platform_ops = {
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index 5b072aba5b65..a7555e43ed14 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -12,7 +12,6 @@
typedef struct mm_context {
struct mm_id id;
struct uml_arch_mm_context arch;
- struct page *stub_pages[2];
} mm_context_t;
extern void __switch_mm(struct mm_id * mm_idp);
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 7414154b8e9a..6c3779541845 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -22,7 +22,6 @@ struct mm_struct;
struct thread_struct {
struct pt_regs regs;
struct pt_regs *segv_regs;
- int singlestep_syscall;
void *fault_addr;
jmp_buf *fault_catcher;
struct task_struct *prev_sched;
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index d8b8b4f07e42..789b83013f35 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -34,7 +34,6 @@ extern int handle_page_fault(unsigned long address, unsigned long ip,
extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
extern void initial_thread_cb(void (*proc)(void *), void *arg);
-extern int is_syscall(unsigned long addr);
extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
@@ -50,7 +49,7 @@ extern void do_uml_exitcalls(void);
* Are we disallowed to sleep? Used to choose between GFP_KERNEL and
* GFP_ATOMIC.
*/
-extern int __cant_sleep(void);
+extern int __uml_cant_sleep(void);
extern int get_current_pid(void);
extern int copy_from_user_proc(void *to, void *from, int size);
extern char *uml_strdup(const char *string);
@@ -58,7 +57,7 @@ extern char *uml_strdup(const char *string);
extern unsigned long to_irq_stack(unsigned long *mask_out);
extern unsigned long from_irq_stack(int nested);
-extern int singlestepping(void *t);
+extern int singlestepping(void);
extern void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 0df646c6651e..aff8906304ea 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -323,9 +323,6 @@ extern void sigio_broken(int fd);
extern int __add_sigio_fd(int fd);
extern int __ignore_sigio_fd(int fd);
-/* prctl.c */
-extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
-
/* tty.c */
extern int get_pty(void);
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 95455e8996e7..8a705d8f96ce 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -12,45 +12,4 @@
extern int ptrace_getregs(long pid, unsigned long *regs_out);
extern int ptrace_setregs(long pid, unsigned long *regs_in);
-/* syscall emulation path in ptrace */
-
-#ifndef PTRACE_SYSEMU
-#define PTRACE_SYSEMU 31
-#endif
-#ifndef PTRACE_SYSEMU_SINGLESTEP
-#define PTRACE_SYSEMU_SINGLESTEP 32
-#endif
-
-/* On architectures, that started to support PTRACE_O_TRACESYSGOOD
- * in linux 2.4, there are two different definitions of
- * PTRACE_SETOPTIONS: linux 2.4 uses 21 while linux 2.6 uses 0x4200.
- * For binary compatibility, 2.6 also supports the old "21", named
- * PTRACE_OLDSETOPTION. On these architectures, UML always must use
- * "21", to ensure the kernel runs on 2.4 and 2.6 host without
- * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
- * We also want to be able to build the kernel on 2.4, which doesn't
- * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
- * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
- *
- * On architectures, that start to support PTRACE_O_TRACESYSGOOD on
- * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
- * supported by the host kernel. In that case, our trick lets us use
- * the new 0x4200 with the name PTRACE_OLDSETOPTIONS.
- */
-#ifndef PTRACE_OLDSETOPTIONS
-#define PTRACE_OLDSETOPTIONS PTRACE_SETOPTIONS
-#endif
-
-void set_using_sysemu(int value);
-int get_using_sysemu(void);
-extern int sysemu_supported;
-
-#define SELECT_PTRACE_OPERATION(sysemu_mode, singlestep_mode) \
- (((int[3][3] ) { \
- { PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \
- { PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \
- { PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \
- PTRACE_SYSEMU_SINGLESTEP } }) \
- [sysemu_mode][singlestep_mode])
-
#endif
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index 2f9c3ce5b45e..a0450326521c 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -14,8 +14,6 @@ extern int save_fp_registers(int pid, unsigned long *fp_regs);
extern int restore_fp_registers(int pid, unsigned long *fp_regs);
extern int save_fpx_registers(int pid, unsigned long *fp_regs);
extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
-extern int save_registers(int pid, struct uml_pt_regs *regs);
-extern int restore_pid_registers(int pid, struct uml_pt_regs *regs);
extern int init_pid_registers(int pid);
extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
extern int get_fp_registers(int pid, unsigned long *regs);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 106b7da2f8d6..ab95648e93e1 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -220,7 +220,7 @@ void arch_cpu_idle(void)
um_idle_sleep();
}
-int __cant_sleep(void) {
+int __uml_cant_sleep(void) {
return in_atomic() || irqs_disabled() || in_interrupt();
/* Is in_interrupt() really needed? */
}
@@ -332,17 +332,9 @@ int __init make_proc_sysemu(void)
late_initcall(make_proc_sysemu);
-int singlestepping(void * t)
+int singlestepping(void)
{
- struct task_struct *task = t ? t : current;
-
- if (!test_thread_flag(TIF_SINGLESTEP))
- return 0;
-
- if (task->thread.singlestep_syscall)
- return 1;
-
- return 2;
+ return test_thread_flag(TIF_SINGLESTEP);
}
/*
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 5154b27de580..6600a2782796 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -12,7 +12,6 @@
void user_enable_single_step(struct task_struct *child)
{
set_tsk_thread_flag(child, TIF_SINGLESTEP);
- child->thread.singlestep_syscall = 0;
#ifdef SUBARCH_SET_SINGLESTEPPING
SUBARCH_SET_SINGLESTEPPING(child, 1);
@@ -22,7 +21,6 @@ void user_enable_single_step(struct task_struct *child)
void user_disable_single_step(struct task_struct *child)
{
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
- child->thread.singlestep_syscall = 0;
#ifdef SUBARCH_SET_SINGLESTEPPING
SUBARCH_SET_SINGLESTEPPING(child, 0);
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index ae4658f576ab..a56b44522766 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -121,18 +121,6 @@ void do_signal(struct pt_regs *regs)
}
/*
- * This closes a way to execute a system call on the host. If
- * you set a breakpoint on a system call instruction and singlestep
- * from it, the tracing thread used to PTRACE_SINGLESTEP the process
- * rather than PTRACE_SYSCALL it, allowing the system call to execute
- * on the host. The tracing thread will check this flag and
- * PTRACE_SYSCALL if necessary.
- */
- if (test_thread_flag(TIF_SINGLESTEP))
- current->thread.singlestep_syscall =
- is_syscall(PT_REGS_IP(&current->thread.regs));
-
- /*
* if there's no signal to deliver, we just put the saved sigmask
* back
*/
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index aaee96f07172..198269e384c4 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -236,7 +236,9 @@ EXPORT_SYMBOL(strnlen_user);
* argument and comparison of the previous
* futex value with another constant.
*
- * @encoded_op: encoded operation to execute
+ * @op: operation to execute
+ * @oparg: argument to operation
+ * @oval: old value at uaddr
* @uaddr: pointer to user space address
*
* Return:
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index fddd1dec27e6..3e270da6b6f6 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -432,9 +432,29 @@ static void time_travel_update_time(unsigned long long next, bool idle)
time_travel_del_event(&ne);
}
+static void time_travel_update_time_rel(unsigned long long offs)
+{
+ unsigned long flags;
+
+ /*
+ * Disable interrupts before calculating the new time so
+ * that a real timer interrupt (signal) can't happen at
+ * a bad time e.g. after we read time_travel_time but
+ * before we've completed updating the time.
+ */
+ local_irq_save(flags);
+ time_travel_update_time(time_travel_time + offs, false);
+ local_irq_restore(flags);
+}
+
void time_travel_ndelay(unsigned long nsec)
{
- time_travel_update_time(time_travel_time + nsec, false);
+ /*
+ * Not strictly needed to use _rel() version since this is
+ * only used in INFCPU/EXT modes, but it doesn't hurt and
+ * is more readable too.
+ */
+ time_travel_update_time_rel(nsec);
}
EXPORT_SYMBOL(time_travel_ndelay);
@@ -568,7 +588,11 @@ static void time_travel_set_start(void)
#define time_travel_time 0
#define time_travel_ext_waiting 0
-static inline void time_travel_update_time(unsigned long long ns, bool retearly)
+static inline void time_travel_update_time(unsigned long long ns, bool idle)
+{
+}
+
+static inline void time_travel_update_time_rel(unsigned long long offs)
{
}
@@ -720,9 +744,7 @@ static u64 timer_read(struct clocksource *cs)
*/
if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
!time_travel_ext_waiting)
- time_travel_update_time(time_travel_time +
- TIMER_MULTIPLIER,
- false);
+ time_travel_update_time_rel(TIMER_MULTIPLIER);
return time_travel_time / TIMER_MULTIPLIER;
}
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index b459745f52e2..3cb8ac63be6e 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -46,7 +46,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
unsigned long stack, sp;
int pid, fds[2], ret, n;
- stack = alloc_stack(0, __cant_sleep());
+ stack = alloc_stack(0, __uml_cant_sleep());
if (stack == 0)
return -ENOMEM;
@@ -70,7 +70,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
data.pre_data = pre_data;
data.argv = argv;
data.fd = fds[1];
- data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
+ data.buf = __uml_cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) :
uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
pid = clone(helper_child, (void *) sp, CLONE_VM, &data);
if (pid < 0) {
@@ -121,7 +121,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
unsigned long stack, sp;
int pid, status, err;
- stack = alloc_stack(0, __cant_sleep());
+ stack = alloc_stack(0, __uml_cant_sleep());
if (stack == 0)
return -ENOMEM;
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index b123955be7ac..bd80b921add0 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -11,26 +11,6 @@
#include <sysdep/ptrace_user.h>
#include <registers.h>
-int save_registers(int pid, struct uml_pt_regs *regs)
-{
- int err;
-
- err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp);
- if (err < 0)
- return -errno;
- return 0;
-}
-
-int restore_pid_registers(int pid, struct uml_pt_regs *regs)
-{
- int err;
-
- err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp);
- if (err < 0)
- return -errno;
- return 0;
-}
-
/* This is set once at boot time and not changed thereafter */
static unsigned long exec_regs[MAX_REG_NR];
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 9464833e741a..1f5c3f2523d1 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -177,48 +177,11 @@ static void handle_segv(int pid, struct uml_pt_regs *regs, unsigned long *aux_fp
segv(regs->faultinfo, 0, 1, NULL);
}
-/*
- * To use the same value of using_sysemu as the caller, ask it that value
- * (in local_using_sysemu
- */
-static void handle_trap(int pid, struct uml_pt_regs *regs,
- int local_using_sysemu)
+static void handle_trap(int pid, struct uml_pt_regs *regs)
{
- int err, status;
-
if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
fatal_sigsegv();
- if (!local_using_sysemu)
- {
- err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET,
- __NR_getpid);
- if (err < 0) {
- printk(UM_KERN_ERR "%s - nullifying syscall failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
-
- err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
- if (err < 0) {
- printk(UM_KERN_ERR "%s - continuing to end of syscall failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
-
- CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
- if ((err < 0) || !WIFSTOPPED(status) ||
- (WSTOPSIG(status) != SIGTRAP + 0x80)) {
- err = ptrace_dump_regs(pid);
- if (err)
- printk(UM_KERN_ERR "Failed to get registers from process, errno = %d\n",
- -err);
- printk(UM_KERN_ERR "%s - failed to wait at end of syscall, errno = %d, status = %d\n",
- __func__, errno, status);
- fatal_sigsegv();
- }
- }
-
handle_syscall(regs);
}
@@ -226,7 +189,7 @@ extern char __syscall_stub_start[];
/**
* userspace_tramp() - userspace trampoline
- * @stack: pointer to the new userspace stack page, can be NULL, if? FIXME:
+ * @stack: pointer to the new userspace stack page
*
* The userspace trampoline is used to setup a new userspace process in start_userspace() after it was clone()'ed.
* This function will run on a temporary stack page.
@@ -241,9 +204,13 @@ extern char __syscall_stub_start[];
*/
static int userspace_tramp(void *stack)
{
+ struct sigaction sa;
void *addr;
int fd;
unsigned long long offset;
+ unsigned long segv_handler = STUB_CODE +
+ (unsigned long) stub_segv_handler -
+ (unsigned long) __syscall_stub_start;
ptrace(PTRACE_TRACEME, 0, 0, 0);
@@ -254,39 +221,30 @@ static int userspace_tramp(void *stack)
addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
if (addr == MAP_FAILED) {
- printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, errno = %d\n",
- STUB_CODE, errno);
+ os_info("mapping mmap stub at 0x%lx failed, errno = %d\n",
+ STUB_CODE, errno);
exit(1);
}
- if (stack != NULL) {
- fd = phys_mapping(uml_to_phys(stack), &offset);
- addr = mmap((void *) STUB_DATA,
- STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_SHARED, fd, offset);
- if (addr == MAP_FAILED) {
- printk(UM_KERN_ERR "mapping segfault stack at 0x%lx failed, errno = %d\n",
- STUB_DATA, errno);
- exit(1);
- }
+ fd = phys_mapping(uml_to_phys(stack), &offset);
+ addr = mmap((void *) STUB_DATA,
+ STUB_DATA_PAGES * UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_SHARED, fd, offset);
+ if (addr == MAP_FAILED) {
+ os_info("mapping segfault stack at 0x%lx failed, errno = %d\n",
+ STUB_DATA, errno);
+ exit(1);
}
- if (stack != NULL) {
- struct sigaction sa;
-
- unsigned long v = STUB_CODE +
- (unsigned long) stub_segv_handler -
- (unsigned long) __syscall_stub_start;
-
- set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
- sa.sa_sigaction = (void *) v;
- sa.sa_restorer = NULL;
- if (sigaction(SIGSEGV, &sa, NULL) < 0) {
- printk(UM_KERN_ERR "%s - setting SIGSEGV handler failed - errno = %d\n",
- __func__, errno);
- exit(1);
- }
+
+ set_sigstack((void *) STUB_DATA, STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+ sa.sa_sigaction = (void *) segv_handler;
+ sa.sa_restorer = NULL;
+ if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+ os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
+ __func__, errno);
+ exit(1);
}
kill(os_getpid(), SIGSTOP);
@@ -298,7 +256,7 @@ int kill_userspace_mm[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
- * @stub_stack: pointer to the stub stack. Can be NULL, if? FIXME:
+ * @stub_stack: pointer to the stub stack.
*
* Setups a new temporary stack page that is used while userspace_tramp() runs
* Clones the kernel process into a new userspace process, with FDs only.
@@ -355,10 +313,10 @@ int start_userspace(unsigned long stub_stack)
goto out_kill;
}
- if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
+ if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *) PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
- printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
+ printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
@@ -380,8 +338,6 @@ int start_userspace(unsigned long stub_stack)
void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
{
int err, status, op, pid = userspace_pid[0];
- /* To prevent races if using_sysemu changes under us.*/
- int local_using_sysemu;
siginfo_t si;
/* Handle any immediate reschedules or signals */
@@ -411,11 +367,10 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
fatal_sigsegv();
}
- /* Now we set local_using_sysemu to be used for one loop */
- local_using_sysemu = get_using_sysemu();
-
- op = SELECT_PTRACE_OPERATION(local_using_sysemu,
- singlestepping(NULL));
+ if (singlestepping())
+ op = PTRACE_SYSEMU_SINGLESTEP;
+ else
+ op = PTRACE_SYSEMU;
if (ptrace(op, pid, 0, 0)) {
printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
@@ -474,7 +429,7 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
else handle_segv(pid, regs, aux_fp_regs);
break;
case SIGTRAP + 0x80:
- handle_trap(pid, regs, local_using_sysemu);
+ handle_trap(pid, regs);
break;
case SIGTRAP:
relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
@@ -597,10 +552,10 @@ int copy_context_skas0(unsigned long new_stack, int pid)
goto out_kill;
}
- if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
+ if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *)PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
- printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
+ printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index e3ee4db58b40..8b0e98ab842c 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -112,102 +112,32 @@ static int start_ptraced_child(void)
return pid;
}
-/* When testing for SYSEMU support, if it is one of the broken versions, we
- * must just avoid using sysemu, not panic, but only if SYSEMU features are
- * broken.
- * So only for SYSEMU features we test mustpanic, while normal host features
- * must work anyway!
- */
-static int stop_ptraced_child(int pid, int exitcode, int mustexit)
+static void stop_ptraced_child(int pid, int exitcode)
{
- int status, n, ret = 0;
+ int status, n;
+
+ if (ptrace(PTRACE_CONT, pid, 0, 0) < 0)
+ fatal_perror("stop_ptraced_child : ptrace failed");
- if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) {
- perror("stop_ptraced_child : ptrace failed");
- return -1;
- }
CATCH_EINTR(n = waitpid(pid, &status, 0));
if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
int exit_with = WEXITSTATUS(status);
- if (exit_with == 2)
- non_fatal("check_ptrace : child exited with status 2. "
- "\nDisabling SYSEMU support.\n");
- non_fatal("check_ptrace : child exited with exitcode %d, while "
- "expecting %d; status 0x%x\n", exit_with,
- exitcode, status);
- if (mustexit)
- exit(1);
- ret = -1;
+ fatal("stop_ptraced_child : child exited with exitcode %d, "
+ "while expecting %d; status 0x%x\n", exit_with,
+ exitcode, status);
}
-
- return ret;
-}
-
-/* Changed only during early boot */
-static int force_sysemu_disabled = 0;
-
-static int __init nosysemu_cmd_param(char *str, int* add)
-{
- force_sysemu_disabled = 1;
- return 0;
}
-__uml_setup("nosysemu", nosysemu_cmd_param,
-"nosysemu\n"
-" Turns off syscall emulation patch for ptrace (SYSEMU).\n"
-" SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n"
-" behaviour of ptrace() and helps reduce host context switch rates.\n"
-" To make it work, you need a kernel patch for your host, too.\n"
-" See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n"
-" information.\n\n");
-
static void __init check_sysemu(void)
{
- unsigned long regs[MAX_REG_NR];
int pid, n, status, count=0;
- os_info("Checking syscall emulation patch for ptrace...");
- sysemu_supported = 0;
- pid = start_ptraced_child();
-
- if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
- goto fail;
-
- CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
- if (n < 0)
- fatal_perror("check_sysemu : wait failed");
- if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
- fatal("check_sysemu : expected SIGTRAP, got status = %d\n",
- status);
-
- if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
- fatal_perror("check_sysemu : PTRACE_GETREGS failed");
- if (PT_SYSCALL_NR(regs) != __NR_getpid) {
- non_fatal("check_sysemu got system call number %d, "
- "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid);
- goto fail;
- }
-
- n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
- if (n < 0) {
- non_fatal("check_sysemu : failed to modify system call "
- "return");
- goto fail;
- }
-
- if (stop_ptraced_child(pid, 0, 0) < 0)
- goto fail_stopped;
-
- sysemu_supported = 1;
- os_info("OK\n");
- set_using_sysemu(!force_sysemu_disabled);
-
- os_info("Checking advanced syscall emulation patch for ptrace...");
+ os_info("Checking syscall emulation for ptrace...");
pid = start_ptraced_child();
- if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+ if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
(void *) PTRACE_O_TRACESYSGOOD) < 0))
- fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed");
+ fatal_perror("check_sysemu: PTRACE_SETOPTIONS failed");
while (1) {
count++;
@@ -240,20 +170,15 @@ static void __init check_sysemu(void)
goto fail;
}
}
- if (stop_ptraced_child(pid, 0, 0) < 0)
- goto fail_stopped;
+ stop_ptraced_child(pid, 0);
- sysemu_supported = 2;
os_info("OK\n");
- if (!force_sysemu_disabled)
- set_using_sysemu(sysemu_supported);
return;
fail:
- stop_ptraced_child(pid, 1, 0);
-fail_stopped:
- non_fatal("missing\n");
+ stop_ptraced_child(pid, 1);
+ fatal("missing\n");
}
static void __init check_ptrace(void)
@@ -263,9 +188,9 @@ static void __init check_ptrace(void)
os_info("Checking that ptrace can change system call numbers...");
pid = start_ptraced_child();
- if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+ if ((ptrace(PTRACE_SETOPTIONS, pid, 0,
(void *) PTRACE_O_TRACESYSGOOD) < 0))
- fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed");
+ fatal_perror("check_ptrace: PTRACE_SETOPTIONS failed");
while (1) {
if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
@@ -291,7 +216,7 @@ static void __init check_ptrace(void)
break;
}
}
- stop_ptraced_child(pid, 0, 1);
+ stop_ptraced_child(pid, 0);
os_info("OK\n");
check_sysemu();
}
@@ -370,7 +295,7 @@ void __init os_early_checks(void)
pid = start_ptraced_child();
if (init_pid_registers(pid))
fatal("Failed to initialize default registers");
- stop_ptraced_child(pid, 1, 1);
+ stop_ptraced_child(pid, 1);
}
int __init parse_iomem(char *str, int *add)
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index fc0f2a9dee5a..1dca4ffbd572 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -173,23 +173,38 @@ __uml_setup("quiet", quiet_cmd_param,
"quiet\n"
" Turns off information messages during boot.\n\n");
+/*
+ * The os_info/os_warn functions will be called by helper threads. These
+ * have a very limited stack size and using the libc formatting functions
+ * may overflow the stack.
+ * So pull in the kernel vscnprintf and use that instead with a fixed
+ * on-stack buffer.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
void os_info(const char *fmt, ...)
{
+ char buf[256];
va_list list;
+ int len;
if (quiet_info)
return;
va_start(list, fmt);
- vfprintf(stderr, fmt, list);
+ len = vscnprintf(buf, sizeof(buf), fmt, list);
+ fwrite(buf, len, 1, stderr);
va_end(list);
}
void os_warn(const char *fmt, ...)
{
+ char buf[256];
va_list list;
+ int len;
va_start(list, fmt);
- vfprintf(stderr, fmt, list);
+ len = vscnprintf(buf, sizeof(buf), fmt, list);
+ fwrite(buf, len, 1, stderr);
va_end(list);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53f2e7797b1d..5edec175b9bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -59,6 +59,7 @@ config X86
#
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
+ select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_INIT
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
@@ -71,6 +72,7 @@ config X86
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_CPU_PASID if IOMMU_SVA
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
@@ -148,6 +150,7 @@ config X86
select GENERIC_CLOCKEVENTS_MIN_ADJUST
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_DEVICES
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_ENTRY
@@ -1967,6 +1970,11 @@ config INTEL_TDX_HOST
depends on CPU_SUP_INTEL
depends on X86_64
depends on KVM_INTEL
+ depends on X86_X2APIC
+ select ARCH_KEEP_MEMBLOCK
+ depends on CONTIG_ALLOC
+ depends on !KEXEC_CORE
+ depends on X86_MCE
help
Intel Trust Domain Extensions (TDX) protects guest VMs from malicious
host and certain physical attacks. This option enables necessary TDX
diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c
index 78e413269791..1655aa56a0a5 100644
--- a/arch/x86/coco/tdx/tdx-shared.c
+++ b/arch/x86/coco/tdx/tdx-shared.c
@@ -22,13 +22,13 @@ static unsigned long try_accept_one(phys_addr_t start, unsigned long len,
*/
switch (pg_level) {
case PG_LEVEL_4K:
- page_size = 0;
+ page_size = TDX_PS_4K;
break;
case PG_LEVEL_2M:
- page_size = 1;
+ page_size = TDX_PS_2M;
break;
case PG_LEVEL_1G:
- page_size = 2;
+ page_size = TDX_PS_1G;
break;
default:
return 0;
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index fecc4fe1d68a..f8f9a9b79395 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -23,10 +23,6 @@ static inline void prefill_possible_map(void) {}
#endif /* CONFIG_SMP */
-struct x86_cpu {
- struct cpu cpu;
-};
-
#ifdef CONFIG_HOTPLUG_CPU
extern void soft_restart_cpu(void);
#endif
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 632c26cdeeda..fdf723b6f6d0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -81,10 +81,8 @@
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-/* CPU types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
@@ -198,6 +196,7 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
@@ -499,6 +498,7 @@
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 197316121f04..b65e9c46b922 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -162,6 +162,8 @@
#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
+#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
+
/* Xeon Phi */
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
index 8fa6ac0e2d76..d91b37f5b4bb 100644
--- a/arch/x86/include/asm/kmsan.h
+++ b/arch/x86/include/asm/kmsan.h
@@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr)
{
unsigned long x = (unsigned long)addr;
unsigned long y = x - __START_KERNEL_map;
+ bool ret;
/* use the carry flag to determine if x was < __START_KERNEL_map */
if (unlikely(x > y)) {
@@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr)
return false;
}
- return pfn_valid(x >> PAGE_SHIFT);
+ /*
+ * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+ * the critical section. However, this would result in recursion with
+ * KMSAN. Therefore, disable preemption here, and re-enable preemption
+ * below while suppressing reschedules to avoid recursion.
+ *
+ * Note, this sacrifices occasionally breaking scheduling guarantees.
+ * Although, a kernel compiled with KMSAN has already given up on any
+ * performance guarantees due to being heavily instrumented.
+ */
+ preempt_disable();
+ ret = pfn_valid(x >> PAGE_SHIFT);
+ preempt_enable_no_resched();
+
+ return ret;
}
#endif /* !MODULE */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 26b628d84594..378ed944b849 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -55,8 +55,10 @@ KVM_X86_OP(set_rflags)
KVM_X86_OP(get_if_flag)
KVM_X86_OP(flush_tlb_all)
KVM_X86_OP(flush_tlb_current)
+#if IS_ENABLED(CONFIG_HYPERV)
KVM_X86_OP_OPTIONAL(flush_remote_tlbs)
KVM_X86_OP_OPTIONAL(flush_remote_tlbs_range)
+#endif
KVM_X86_OP(flush_tlb_gva)
KVM_X86_OP(flush_tlb_guest)
KVM_X86_OP(vcpu_pre_run)
@@ -135,6 +137,7 @@ KVM_X86_OP(msr_filter_changed)
KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
+KVM_X86_OP_OPTIONAL(get_untagged_addr)
#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
index 6c98f4bb4228..058bc636356a 100644
--- a/arch/x86/include/asm/kvm-x86-pmu-ops.h
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -22,7 +22,7 @@ KVM_X86_PMU_OP(get_msr)
KVM_X86_PMU_OP(set_msr)
KVM_X86_PMU_OP(refresh)
KVM_X86_PMU_OP(init)
-KVM_X86_PMU_OP(reset)
+KVM_X86_PMU_OP_OPTIONAL(reset)
KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
KVM_X86_PMU_OP_OPTIONAL(cleanup)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6711da000bb7..b5b2d0fde579 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -133,7 +133,8 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
+ | X86_CR4_LAM_SUP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -500,8 +501,23 @@ struct kvm_pmc {
u8 idx;
bool is_paused;
bool intr;
+ /*
+ * Base value of the PMC counter, relative to the *consumed* count in
+ * the associated perf_event. This value includes counter updates from
+ * the perf_event and emulated_count since the last time the counter
+ * was reprogrammed, but it is *not* the current value as seen by the
+ * guest or userspace.
+ *
+ * The count is relative to the associated perf_event so that KVM
+ * doesn't need to reprogram the perf_event every time the guest writes
+ * to the counter.
+ */
u64 counter;
- u64 prev_counter;
+ /*
+ * PMC events triggered by KVM emulation that haven't been fully
+ * processed, i.e. haven't undergone overflow detection.
+ */
+ u64 emulated_counter;
u64 eventsel;
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
@@ -937,8 +953,10 @@ struct kvm_vcpu_arch {
/* used for guest single stepping over the given code position */
unsigned long singlestep_rip;
+#ifdef CONFIG_KVM_HYPERV
bool hyperv_enabled;
struct kvm_vcpu_hv *hyperv;
+#endif
#ifdef CONFIG_KVM_XEN
struct kvm_vcpu_xen xen;
#endif
@@ -1095,6 +1113,7 @@ enum hv_tsc_page_status {
HV_TSC_PAGE_BROKEN,
};
+#ifdef CONFIG_KVM_HYPERV
/* Hyper-V emulation context */
struct kvm_hv {
struct mutex hv_lock;
@@ -1125,9 +1144,9 @@ struct kvm_hv {
*/
unsigned int synic_auto_eoi_used;
- struct hv_partition_assist_pg *hv_pa_pg;
struct kvm_hv_syndbg hv_syndbg;
};
+#endif
struct msr_bitmap_range {
u32 flags;
@@ -1136,6 +1155,7 @@ struct msr_bitmap_range {
unsigned long *bitmap;
};
+#ifdef CONFIG_KVM_XEN
/* Xen emulation context */
struct kvm_xen {
struct mutex xen_lock;
@@ -1147,6 +1167,7 @@ struct kvm_xen {
struct idr evtchn_ports;
unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
};
+#endif
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
@@ -1255,6 +1276,7 @@ enum kvm_apicv_inhibit {
};
struct kvm_arch {
+ unsigned long vm_type;
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
@@ -1347,8 +1369,13 @@ struct kvm_arch {
/* reads protected by irq_srcu, writes by irq_lock */
struct hlist_head mask_notifier_list;
+#ifdef CONFIG_KVM_HYPERV
struct kvm_hv hyperv;
+#endif
+
+#ifdef CONFIG_KVM_XEN
struct kvm_xen xen;
+#endif
bool backwards_tsc_observed;
bool boot_vcpu_runs_old_kvmclock;
@@ -1406,9 +1433,8 @@ struct kvm_arch {
* the MMU lock in read mode + RCU or
* the MMU lock in write mode
*
- * For writes, this list is protected by:
- * the MMU lock in read mode + the tdp_mmu_pages_lock or
- * the MMU lock in write mode
+ * For writes, this list is protected by tdp_mmu_pages_lock; see
+ * below for the details.
*
* Roots will remain in the list until their tdp_mmu_root_count
* drops to zero, at which point the thread that decremented the
@@ -1425,8 +1451,10 @@ struct kvm_arch {
* - possible_nx_huge_pages;
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
* by the TDP MMU
- * It is acceptable, but not necessary, to acquire this lock when
- * the thread holds the MMU lock in write mode.
+ * Because the lock is only taken within the MMU lock, strictly
+ * speaking it is redundant to acquire this lock when the thread
+ * holds the MMU lock in write mode. However it often simplifies
+ * the code to do so.
*/
spinlock_t tdp_mmu_pages_lock;
#endif /* CONFIG_X86_64 */
@@ -1441,6 +1469,7 @@ struct kvm_arch {
#if IS_ENABLED(CONFIG_HYPERV)
hpa_t hv_root_tdp;
spinlock_t hv_root_tdp_lock;
+ struct hv_partition_assist_pg *hv_pa_pg;
#endif
/*
* VM-scope maximum vCPU ID. Used to determine the size of structures
@@ -1613,9 +1642,11 @@ struct kvm_x86_ops {
void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
+#if IS_ENABLED(CONFIG_HYPERV)
int (*flush_remote_tlbs)(struct kvm *kvm);
int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn,
gfn_t nr_pages);
+#endif
/*
* Flush any TLB entries associated with the given GVA.
@@ -1761,6 +1792,8 @@ struct kvm_x86_ops {
* Returns vCPU specific APICv inhibit reasons
*/
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
+
+ gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
};
struct kvm_x86_nested_ops {
@@ -1824,6 +1857,7 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
#define __KVM_HAVE_ARCH_VM_FREE
void kvm_arch_free_vm(struct kvm *kvm);
+#if IS_ENABLED(CONFIG_HYPERV)
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
@@ -1835,6 +1869,15 @@ static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
}
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
+ u64 nr_pages)
+{
+ if (!kvm_x86_ops.flush_remote_tlbs_range)
+ return -EOPNOTSUPP;
+
+ return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
+}
+#endif /* CONFIG_HYPERV */
#define kvm_arch_pmi_in_guest(vcpu) \
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
@@ -1848,6 +1891,9 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
+void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -2086,6 +2132,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
int tdp_max_root_level, int tdp_huge_page_level);
+#ifdef CONFIG_KVM_PRIVATE_MEM
+#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
+#else
+#define kvm_arch_has_private_mem(kvm) false
+#endif
+
static inline u16 kvm_read_ldt(void)
{
u16 ldt;
@@ -2133,16 +2185,15 @@ enum {
#define HF_SMM_MASK (1 << 1)
#define HF_SMM_INSIDE_NMI_MASK (1 << 2)
-# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-# define KVM_ADDRESS_SPACE_NUM 2
+# define KVM_MAX_NR_ADDRESS_SPACES 2
+/* SMM is currently unsupported for guests with private memory. */
+# define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2)
# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
#else
# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
#endif
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_cpu_has_extint(struct kvm_vcpu *v);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 737a52b89e64..f1bd7b91b3c6 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -541,6 +541,9 @@
#define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309
+/* KeyID partitioning between MKTME and TDX */
+#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
+
/*
* AMD64 MSRs. Not complete. See the architecture manual for a more
* complete list.
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index ccce7ebd8677..fdfd41511b02 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -55,6 +55,12 @@
(TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \
TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15)
+/* TDX supported page sizes from the TDX module ABI. */
+#define TDX_PS_4K 0
+#define TDX_PS_2M 1
+#define TDX_PS_1G 2
+#define TDX_PS_NR (TDX_PS_1G + 1)
+
#ifndef __ASSEMBLY__
#include <linux/compiler_attributes.h>
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 21f9407be5d3..7e88705e907f 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -58,12 +58,29 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
,,regs->di,,regs->si,,regs->dx \
,,regs->r10,,regs->r8,,regs->r9) \
+
+/* SYSCALL_PT_ARGS is Adapted from s390x */
+#define SYSCALL_PT_ARG6(m, t1, t2, t3, t4, t5, t6) \
+ SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5), m(t6, (regs->bp))
+#define SYSCALL_PT_ARG5(m, t1, t2, t3, t4, t5) \
+ SYSCALL_PT_ARG4(m, t1, t2, t3, t4), m(t5, (regs->di))
+#define SYSCALL_PT_ARG4(m, t1, t2, t3, t4) \
+ SYSCALL_PT_ARG3(m, t1, t2, t3), m(t4, (regs->si))
+#define SYSCALL_PT_ARG3(m, t1, t2, t3) \
+ SYSCALL_PT_ARG2(m, t1, t2), m(t3, (regs->dx))
+#define SYSCALL_PT_ARG2(m, t1, t2) \
+ SYSCALL_PT_ARG1(m, t1), m(t2, (regs->cx))
+#define SYSCALL_PT_ARG1(m, t1) m(t1, (regs->bx))
+#define SYSCALL_PT_ARGS(x, ...) SYSCALL_PT_ARG##x(__VA_ARGS__)
+
+#define __SC_COMPAT_CAST(t, a) \
+ (__typeof(__builtin_choose_expr(__TYPE_IS_L(t), 0, 0U))) \
+ (unsigned int)a
+
/* Mapping of registers to parameters for syscalls on i386 */
#define SC_IA32_REGS_TO_ARGS(x, ...) \
- __MAP(x,__SC_ARGS \
- ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \
- ,,(unsigned int)regs->dx,,(unsigned int)regs->si \
- ,,(unsigned int)regs->di,,(unsigned int)regs->bp)
+ SYSCALL_PT_ARGS(x, __SC_COMPAT_CAST, \
+ __MAP(x, __SC_TYPE, __VA_ARGS__)) \
#define __SYS_STUB0(abi, name) \
long __##abi##_##name(const struct pt_regs *regs); \
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index f3d5305a60fc..eba178996d84 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -24,8 +24,16 @@
#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP)
#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD)
+/*
+ * TDX module SEAMCALL leaf function error codes
+ */
+#define TDX_SUCCESS 0ULL
+#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
+
#ifndef __ASSEMBLY__
+#include <uapi/asm/mce.h>
+
/*
* Used by the #VE exception handler to gather the #VE exception
* info from the TDX module. This is a software only structure
@@ -83,6 +91,36 @@ static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1,
u64 __seamcall(u64 fn, struct tdx_module_args *args);
u64 __seamcall_ret(u64 fn, struct tdx_module_args *args);
u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args);
+void tdx_init(void);
+
+#include <asm/archrandom.h>
+
+typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args);
+
+static inline u64 sc_retry(sc_func_t func, u64 fn,
+ struct tdx_module_args *args)
+{
+ int retry = RDRAND_RETRY_LOOPS;
+ u64 ret;
+
+ do {
+ ret = func(fn, args);
+ } while (ret == TDX_RND_NO_ENTROPY && --retry);
+
+ return ret;
+}
+
+#define seamcall(_fn, _args) sc_retry(__seamcall, (_fn), (_args))
+#define seamcall_ret(_fn, _args) sc_retry(__seamcall_ret, (_fn), (_args))
+#define seamcall_saved_ret(_fn, _args) sc_retry(__seamcall_saved_ret, (_fn), (_args))
+int tdx_cpu_enable(void);
+int tdx_enable(void);
+const char *tdx_dump_mce_info(struct mce *m);
+#else
+static inline void tdx_init(void) { }
+static inline int tdx_cpu_enable(void) { return -ENODEV; }
+static inline int tdx_enable(void) { return -ENODEV; }
+static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
#endif /* CONFIG_INTEL_TDX_HOST */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 1a6a1f987949..a448d0964fc0 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -562,4 +562,7 @@ struct kvm_pmu_event_filter {
/* x86-specific KVM_EXIT_HYPERCALL flags. */
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+#define KVM_X86_DEFAULT_VM 0
+#define KVM_X86_SW_PROTECTED_VM 1
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index cc130b57542a..1d85cb7071cb 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -403,7 +403,7 @@ noinstr void BUG_func(void)
{
BUG();
}
-EXPORT_SYMBOL_GPL(BUG_func);
+EXPORT_SYMBOL(BUG_func);
#define CALL_RIP_REL_OPCODE 0xff
#define CALL_RIP_REL_MODRM 0x15
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 4feaa670d578..89c0c8a3fc7e 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -259,10 +259,9 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
order);
}
- /* No multi-function device? */
type = read_pci_config_byte(bus, slot, func,
PCI_HEADER_TYPE);
- if (!(type & 0x80))
+ if (!(type & PCI_HEADER_TYPE_MFD))
break;
}
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9f42d1c59e09..f3abca334199 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -538,7 +538,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
/* Figure out Zen generations: */
switch (c->x86) {
- case 0x17: {
+ case 0x17:
switch (c->x86_model) {
case 0x00 ... 0x2f:
case 0x50 ... 0x5f:
@@ -554,8 +554,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
goto warn;
}
break;
- }
- case 0x19: {
+
+ case 0x19:
switch (c->x86_model) {
case 0x00 ... 0x0f:
case 0x20 ... 0x5f:
@@ -569,7 +569,20 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
goto warn;
}
break;
- }
+
+ case 0x1a:
+ switch (c->x86_model) {
+ case 0x00 ... 0x0f:
+ case 0x20 ... 0x2f:
+ case 0x40 ... 0x4f:
+ case 0x70 ... 0x7f:
+ setup_force_cpu_cap(X86_FEATURE_ZEN5);
+ break;
+ default:
+ goto warn;
+ }
+ break;
+
default:
break;
}
@@ -1039,6 +1052,11 @@ static void init_amd_zen4(struct cpuinfo_x86 *c)
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
}
+static void init_amd_zen5(struct cpuinfo_x86 *c)
+{
+ init_amd_zen_common();
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
u64 vm_cr;
@@ -1084,6 +1102,8 @@ static void init_amd(struct cpuinfo_x86 *c)
init_amd_zen3(c);
else if (boot_cpu_has(X86_FEATURE_ZEN4))
init_amd_zen4(c);
+ else if (boot_cpu_has(X86_FEATURE_ZEN5))
+ init_amd_zen5(c);
/*
* Enable workaround for FXSAVE leak on CPUs
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 94bff381ef20..0b97bcde70c6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -66,6 +66,7 @@
#include <asm/set_memory.h>
#include <asm/traps.h>
#include <asm/sev.h>
+#include <asm/tdx.h>
#include "cpu.h"
@@ -1986,6 +1987,7 @@ static __init void identify_boot_cpu(void)
setup_cr_pinning();
tsx_init();
+ tdx_init();
lkgs_init();
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index fd5ce12c4f9a..bc39252bc54f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -53,6 +53,7 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/reboot.h>
+#include <asm/tdx.h>
#include "internal.h"
@@ -229,12 +230,20 @@ static void wait_for_panic(void)
panic("Panicing machine check CPU died");
}
+static const char *mce_dump_aux_info(struct mce *m)
+{
+ if (boot_cpu_has_bug(X86_BUG_TDX_PW_MCE))
+ return tdx_dump_mce_info(m);
+
+ return NULL;
+}
+
static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
{
struct llist_node *pending;
struct mce_evt_llist *l;
int apei_err = 0;
- struct page *p;
+ const char *memmsg;
/*
* Allow instrumentation around external facilities usage. Not that it
@@ -285,6 +294,11 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
}
if (exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
+
+ memmsg = mce_dump_aux_info(final);
+ if (memmsg)
+ pr_emerg(HW_ERR "Machine check: %s\n", memmsg);
+
if (!fake_panic) {
if (panic_timeout == 0)
panic_timeout = mca_cfg.panic_timeout;
@@ -297,6 +311,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
*/
if (kexec_crash_loaded()) {
if (final && (final->status & MCI_STATUS_ADDRV)) {
+ struct page *p;
p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
if (p)
SetPageHWPoison(p);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a6c1867fc7aa..59f4aefc6bc1 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -779,13 +779,13 @@ static int __init check_dev_quirk(int num, int slot, int func)
type = read_pci_config_byte(num, slot, func,
PCI_HEADER_TYPE);
- if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) {
+ if ((type & PCI_HEADER_TYPE_MASK) == PCI_HEADER_TYPE_BRIDGE) {
sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS);
if (sec > num)
early_pci_scan_bus(sec);
}
- if (!(type & 0x80))
+ if (!(type & PCI_HEADER_TYPE_MFD))
return -1;
return 0;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8ff2bf921519..a38d0c93a66e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1438,7 +1438,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) {
- if (unlikely(mc146818_get_time(&curr_time) < 0)) {
+ if (unlikely(mc146818_get_time(&curr_time, 10) < 0)) {
pr_err_ratelimited("unable to read current time from RTC\n");
return IRQ_HANDLED;
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index a95d0900e8c6..5bb395551c44 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -24,8 +24,8 @@
static int kvmclock __initdata = 1;
static int kvmclock_vsyscall __initdata = 1;
-static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME;
-static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK;
+static int msr_kvm_system_time __ro_after_init;
+static int msr_kvm_wall_clock __ro_after_init;
static u64 kvm_sched_clock_offset __ro_after_init;
static int __init parse_no_kvmclock(char *arg)
@@ -195,7 +195,8 @@ static void kvm_setup_secondary_clock(void)
void kvmclock_disable(void)
{
- native_write_msr(msr_kvm_system_time, 0, 0);
+ if (msr_kvm_system_time)
+ native_write_msr(msr_kvm_system_time, 0, 0);
}
static void __init kvmclock_init_mem(void)
@@ -294,7 +295,10 @@ void __init kvmclock_init(void)
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
- } else if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+ } else if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+ msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
+ msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
+ } else {
return;
}
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 1309b9b05338..2e7066980f3e 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -67,7 +67,7 @@ void mach_get_cmos_time(struct timespec64 *now)
return;
}
- if (mc146818_get_time(&tm)) {
+ if (mc146818_get_time(&tm, 1000)) {
pr_err("Unable to read current time from RTC\n");
now->tv_sec = now->tv_nsec = 0;
return;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ec2c21a1844e..84201071dfac 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1031,6 +1031,8 @@ void __init setup_arch(char **cmdline_p)
*
* Moreover, on machines with SandyBridge graphics or in setups that use
* crashkernel the entire 1M is reserved anyway.
+ *
+ * Note the host kernel TDX also requires the first 1MB being reserved.
*/
x86_platform.realmode_reserve();
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 0bab03130033..d42c28b8bfd8 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -35,38 +35,9 @@
#include <asm/io_apic.h>
#include <asm/cpu.h>
-static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
-
#ifdef CONFIG_HOTPLUG_CPU
-int arch_register_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
{
- struct x86_cpu *xc = per_cpu_ptr(&cpu_devices, cpu);
-
- xc->cpu.hotpluggable = cpu > 0;
- return register_cpu(&xc->cpu, cpu);
-}
-EXPORT_SYMBOL(arch_register_cpu);
-
-void arch_unregister_cpu(int num)
-{
- unregister_cpu(&per_cpu(cpu_devices, num).cpu);
-}
-EXPORT_SYMBOL(arch_unregister_cpu);
-#else /* CONFIG_HOTPLUG_CPU */
-
-int __init arch_register_cpu(int num)
-{
- return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
+ return cpu > 0;
}
#endif /* CONFIG_HOTPLUG_CPU */
-
-static int __init topology_init(void)
-{
- int i;
-
- for_each_present_cpu(i)
- arch_register_cpu(i);
-
- return 0;
-}
-subsys_initcall(topology_init);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b0737a15c470..c3b2f863acf0 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -566,7 +566,7 @@ static bool fixup_iopl_exception(struct pt_regs *regs)
*/
static bool try_fixup_enqcmd_gp(void)
{
-#ifdef CONFIG_IOMMU_SVA
+#ifdef CONFIG_ARCH_HAS_CPU_PASID
u32 pasid;
/*
@@ -592,7 +592,7 @@ static bool try_fixup_enqcmd_gp(void)
if (!mm_valid_pasid(current->mm))
return false;
- pasid = current->mm->pasid;
+ pasid = mm_get_enqcmd_pasid(current->mm);
/*
* Did this thread already have its PASID activated?
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 950c12868d30..87e3da7b0439 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -23,17 +23,15 @@ config KVM
depends on HAVE_KVM
depends on HIGH_RES_TIMERS
depends on X86_LOCAL_APIC
- select PREEMPT_NOTIFIERS
- select MMU_NOTIFIER
+ select KVM_COMMON
+ select KVM_GENERIC_MMU_NOTIFIER
select HAVE_KVM_IRQCHIP
select HAVE_KVM_PFNCACHE
- select HAVE_KVM_IRQFD
select HAVE_KVM_DIRTY_RING_TSO
select HAVE_KVM_DIRTY_RING_ACQ_REL
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
- select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
@@ -46,7 +44,6 @@ config KVM
select KVM_XFER_TO_GUEST_WORK
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
- select INTERVAL_TREE
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
help
@@ -65,18 +62,30 @@ config KVM
config KVM_WERROR
bool "Compile KVM with -Werror"
- # KASAN may cause the build to fail due to larger frames
- default y if X86_64 && !KASAN
- # We use the dependency on !COMPILE_TEST to not be enabled
- # blindly in allmodconfig or allyesconfig configurations
- depends on KVM
- depends on (X86_64 && !KASAN) || !COMPILE_TEST
- depends on EXPERT
+ # Disallow KVM's -Werror if KASAN is enabled, e.g. to guard against
+ # randomized configs from selecting KVM_WERROR=y, which doesn't play
+ # nice with KASAN. KASAN builds generates warnings for the default
+ # FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
+ # Building KVM with -Werror and KASAN is still doable via enabling
+ # the kernel-wide WERROR=y.
+ depends on KVM && EXPERT && !KASAN
help
Add -Werror to the build flags for KVM.
If in doubt, say "N".
+config KVM_SW_PROTECTED_VM
+ bool "Enable support for KVM software-protected VMs"
+ depends on EXPERT
+ depends on KVM && X86_64
+ select KVM_GENERIC_PRIVATE_MEM
+ help
+ Enable support for KVM software-protected VMs. Currently "protected"
+ means the VM can be backed with memory provided by
+ KVM_CREATE_GUEST_MEMFD.
+
+ If unsure, say "N".
+
config KVM_INTEL
tristate "KVM for Intel (and compatible) processors support"
depends on KVM && IA32_FEAT_CTL
@@ -129,6 +138,20 @@ config KVM_SMM
If unsure, say Y.
+config KVM_HYPERV
+ bool "Support for Microsoft Hyper-V emulation"
+ depends on KVM
+ default y
+ help
+ Provides KVM support for emulating Microsoft Hyper-V. This allows KVM
+ to expose a subset of the paravirtualized interfaces defined in the
+ Hyper-V Hypervisor Top-Level Functional Specification (TLFS):
+ https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
+ These interfaces are required for the correct and performant functioning
+ of Windows and Hyper-V guests on KVM.
+
+ If unsure, say "Y".
+
config KVM_XEN
bool "Support for Xen hypercall interface"
depends on KVM
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 80e3fe184d17..475b5fa917a6 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -11,25 +11,27 @@ include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
- hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
+ debugfs.o mmu/mmu.o mmu/page_track.o \
mmu/spte.o
-ifdef CONFIG_HYPERV
-kvm-y += kvm_onhyperv.o
-endif
-
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
+kvm-$(CONFIG_KVM_HYPERV) += hyperv.o
kvm-$(CONFIG_KVM_XEN) += xen.o
kvm-$(CONFIG_KVM_SMM) += smm.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
- vmx/hyperv.o vmx/nested.o vmx/posted_intr.o
+ vmx/nested.o vmx/posted_intr.o
+
kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o
+kvm-intel-$(CONFIG_KVM_HYPERV) += vmx/hyperv.o vmx/hyperv_evmcs.o
kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \
- svm/sev.o svm/hyperv.o
+ svm/sev.o
+kvm-amd-$(CONFIG_KVM_HYPERV) += svm/hyperv.o
ifdef CONFIG_HYPERV
+kvm-y += kvm_onhyperv.o
+kvm-intel-y += vmx/vmx_onhyperv.o vmx/hyperv_evmcs.o
kvm-amd-y += svm/svm_onhyperv.o
endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 42d3f47f4c07..adba49afb5fe 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -314,11 +314,15 @@ EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
{
+#ifdef CONFIG_KVM_HYPERV
struct kvm_cpuid_entry2 *entry;
entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE,
KVM_CPUID_INDEX_NOT_SIGNIFICANT);
return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX;
+#else
+ return false;
+#endif
}
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
@@ -433,11 +437,13 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
return 0;
}
+#ifdef CONFIG_KVM_HYPERV
if (kvm_cpuid_has_hyperv(e2, nent)) {
r = kvm_hv_vcpu_init(vcpu);
if (r)
return r;
}
+#endif
r = kvm_check_cpuid(vcpu, e2, nent);
if (r)
@@ -469,7 +475,7 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
return -E2BIG;
if (cpuid->nent) {
- e = vmemdup_user(entries, array_size(sizeof(*e), cpuid->nent));
+ e = vmemdup_array_user(entries, cpuid->nent, sizeof(*e));
if (IS_ERR(e))
return PTR_ERR(e);
@@ -513,7 +519,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
return -E2BIG;
if (cpuid->nent) {
- e2 = vmemdup_user(entries, array_size(sizeof(*e2), cpuid->nent));
+ e2 = vmemdup_array_user(entries, cpuid->nent, sizeof(*e2));
if (IS_ERR(e2))
return PTR_ERR(e2);
}
@@ -671,7 +677,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_7_1_EAX,
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
F(FZRM) | F(FSRS) | F(FSRC) |
- F(AMX_FP16) | F(AVX_IFMA)
+ F(AMX_FP16) | F(AVX_IFMA) | F(LAM)
);
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
@@ -679,6 +685,11 @@ void kvm_set_cpu_caps(void)
F(AMX_COMPLEX)
);
+ kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
+ F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) |
+ F(BHI_CTRL) | F(MCDT_NO)
+ );
+
kvm_cpu_cap_mask(CPUID_D_1_EAX,
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
);
@@ -960,13 +971,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
break;
/* function 7 has additional index. */
case 7:
- entry->eax = min(entry->eax, 1u);
+ max_idx = entry->eax = min(entry->eax, 2u);
cpuid_entry_override(entry, CPUID_7_0_EBX);
cpuid_entry_override(entry, CPUID_7_ECX);
cpuid_entry_override(entry, CPUID_7_EDX);
- /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */
- if (entry->eax == 1) {
+ /* KVM only supports up to 0x7.2, capped above via min(). */
+ if (max_idx >= 1) {
entry = do_host_cpuid(array, function, 1);
if (!entry)
goto out;
@@ -976,6 +987,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ebx = 0;
entry->ecx = 0;
}
+ if (max_idx >= 2) {
+ entry = do_host_cpuid(array, function, 2);
+ if (!entry)
+ goto out;
+
+ cpuid_entry_override(entry, CPUID_7_2_EDX);
+ entry->ecx = 0;
+ entry->ebx = 0;
+ entry->eax = 0;
+ }
break;
case 0xa: { /* Architectural Performance Monitoring */
union cpuid10_eax eax;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 0b90532b6e26..856e3037e74f 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -47,11 +47,6 @@ static inline bool kvm_vcpu_is_legal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
return !(gpa & vcpu->arch.reserved_gpa_bits);
}
-static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
- return !kvm_vcpu_is_legal_gpa(vcpu, gpa);
-}
-
static inline bool kvm_vcpu_is_legal_aligned_gpa(struct kvm_vcpu *vcpu,
gpa_t gpa, gpa_t alignment)
{
@@ -279,4 +274,12 @@ static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
vcpu->arch.governed_features.enabled);
}
+static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+{
+ if (guest_can_use(vcpu, X86_FEATURE_LAM))
+ cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
+
+ return kvm_vcpu_is_legal_gpa(vcpu, cr3);
+}
+
#endif
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index eea6ea7f14af..95ea1a1f7403 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -111,7 +111,7 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
mutex_lock(&kvm->slots_lock);
write_lock(&kvm->mmu_lock);
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
int bkt;
slots = __kvm_memslots(kvm, i);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2673cd5c46cb..e223043ef5b2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -687,8 +687,8 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
unsigned *max_size, unsigned size,
- bool write, bool fetch,
- enum x86emul_mode mode, ulong *linear)
+ enum x86emul_mode mode, ulong *linear,
+ unsigned int flags)
{
struct desc_struct desc;
bool usable;
@@ -701,7 +701,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
*max_size = 0;
switch (mode) {
case X86EMUL_MODE_PROT64:
- *linear = la;
+ *linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
va_bits = ctxt_virt_addr_bits(ctxt);
if (!__is_canonical_address(la, va_bits))
goto bad;
@@ -717,11 +717,11 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
if (!usable)
goto bad;
/* code segment in protected mode or read-only data segment */
- if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
- || !(desc.type & 2)) && write)
+ if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
+ (flags & X86EMUL_F_WRITE))
goto bad;
/* unreadable code segment */
- if (!fetch && (desc.type & 8) && !(desc.type & 2))
+ if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
goto bad;
lim = desc_limit_scaled(&desc);
if (!(desc.type & 8) && (desc.type & 4)) {
@@ -757,8 +757,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
ulong *linear)
{
unsigned max_size;
- return __linearize(ctxt, addr, &max_size, size, write, false,
- ctxt->mode, linear);
+ return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
+ write ? X86EMUL_F_WRITE : 0);
}
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
@@ -771,7 +771,8 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
if (ctxt->op_bytes != sizeof(unsigned long))
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
- rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
+ rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
+ X86EMUL_F_FETCH);
if (rc == X86EMUL_CONTINUE)
ctxt->_eip = addr.ea;
return rc;
@@ -907,8 +908,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
* boundary check itself. Instead, we use max_size to check
* against op_size.
*/
- rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
- &linear);
+ rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
+ X86EMUL_F_FETCH);
if (unlikely(rc != X86EMUL_CONTINUE))
return rc;
@@ -3439,8 +3440,10 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt)
{
int rc;
ulong linear;
+ unsigned int max_size;
- rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
+ rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
+ &linear, X86EMUL_F_INVLPG);
if (rc == X86EMUL_CONTINUE)
ctxt->ops->invlpg(ctxt, linear);
/* Disable writeback. */
diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h
index 423a73395c10..ad463b1ed4e4 100644
--- a/arch/x86/kvm/governed_features.h
+++ b/arch/x86/kvm/governed_features.h
@@ -16,6 +16,7 @@ KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
KVM_GOVERNED_X86_FEATURE(VGIF)
KVM_GOVERNED_X86_FEATURE(VNMI)
+KVM_GOVERNED_X86_FEATURE(LAM)
#undef KVM_GOVERNED_X86_FEATURE
#undef KVM_GOVERNED_FEATURE
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index f83b8db72b11..1dc0b6604526 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -24,6 +24,8 @@
#include <linux/kvm_host.h>
#include "x86.h"
+#ifdef CONFIG_KVM_HYPERV
+
/* "Hv#1" signature */
#define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
@@ -105,6 +107,17 @@ int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
+static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector)
+{
+ return to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->vec_bitmap);
+}
+
+static inline bool kvm_hv_synic_auto_eoi_set(struct kvm_vcpu *vcpu, int vector)
+{
+ return to_hv_vcpu(vcpu) &&
+ test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap);
+}
+
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
@@ -236,6 +249,76 @@ static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
return kvm_hv_get_assist_page(vcpu);
}
+static inline void kvm_hv_nested_transtion_tlb_flush(struct kvm_vcpu *vcpu,
+ bool tdp_enabled)
+{
+ /*
+ * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
+ * L2's VP_ID upon request from the guest. Make sure we check for
+ * pending entries in the right FIFO upon L1/L2 transition as these
+ * requests are put by other vCPUs asynchronously.
+ */
+ if (to_hv_vcpu(vcpu) && tdp_enabled)
+ kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+}
+
int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
+#else /* CONFIG_KVM_HYPERV */
+static inline void kvm_hv_setup_tsc_page(struct kvm *kvm,
+ struct pvclock_vcpu_time_info *hv_clock) {}
+static inline void kvm_hv_request_tsc_page_update(struct kvm *kvm) {}
+static inline void kvm_hv_init_vm(struct kvm *kvm) {}
+static inline void kvm_hv_destroy_vm(struct kvm *kvm) {}
+static inline int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+static inline void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_hv_hypercall_enabled(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+ return HV_STATUS_ACCESS_DENIED;
+}
+static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {}
+static inline void kvm_hv_free_pa_page(struct kvm *kvm) {}
+static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector)
+{
+ return false;
+}
+static inline bool kvm_hv_synic_auto_eoi_set(struct kvm_vcpu *vcpu, int vector)
+{
+ return false;
+}
+static inline void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) {}
+static inline bool kvm_hv_invtsc_suppressed(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+static inline void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled) {}
+static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
+{
+ return vcpu->vcpu_idx;
+}
+static inline void kvm_hv_nested_transtion_tlb_flush(struct kvm_vcpu *vcpu, bool tdp_enabled) {}
+#endif /* CONFIG_KVM_HYPERV */
-#endif
+#endif /* __ARCH_X86_KVM_HYPERV_H__ */
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index b2c397dd2bc6..ad9ca8a60144 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -118,8 +118,10 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
if (!lapic_in_kernel(v))
return v->arch.interrupt.nr;
+#ifdef CONFIG_KVM_XEN
if (kvm_xen_has_interrupt(v))
return v->kvm->arch.xen.upcall_vector;
+#endif
if (irqchip_split(v->kvm)) {
int vector = v->arch.pending_external_vector;
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 16d076a1b91a..68f3f6c26046 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -144,7 +144,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
}
-
+#ifdef CONFIG_KVM_HYPERV
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
@@ -154,6 +154,7 @@ static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
}
+#endif
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
@@ -163,9 +164,11 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
int r;
switch (e->type) {
+#ifdef CONFIG_KVM_HYPERV
case KVM_IRQ_ROUTING_HV_SINT:
return kvm_hv_set_sint(e, kvm, irq_source_id, level,
line_status);
+#endif
case KVM_IRQ_ROUTING_MSI:
if (kvm_msi_route_invalid(kvm, e))
@@ -314,11 +317,13 @@ int kvm_set_routing_entry(struct kvm *kvm,
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
break;
+#ifdef CONFIG_KVM_HYPERV
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
e->hv_sint.sint = ue->u.hv_sint.sint;
break;
+#endif
#ifdef CONFIG_KVM_XEN
case KVM_IRQ_ROUTING_XEN_EVTCHN:
return kvm_xen_setup_evtchn(kvm, e, ue);
@@ -438,5 +443,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
void kvm_arch_irq_routing_update(struct kvm *kvm)
{
+#ifdef CONFIG_KVM_HYPERV
kvm_hv_irq_routing_update(kvm);
+#endif
}
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index be7aeb9b8ea3..e6d149825169 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -88,6 +88,12 @@ struct x86_instruction_info {
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
+/* x86-specific emulation flags */
+#define X86EMUL_F_WRITE BIT(0)
+#define X86EMUL_F_FETCH BIT(1)
+#define X86EMUL_F_IMPLICIT BIT(2)
+#define X86EMUL_F_INVLPG BIT(3)
+
struct x86_emulate_ops {
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
/*
@@ -224,6 +230,9 @@ struct x86_emulate_ops {
int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
+
+ gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
+ unsigned int flags);
};
/* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h
index f9ca3e7432b2..eefab3dc8498 100644
--- a/arch/x86/kvm/kvm_onhyperv.h
+++ b/arch/x86/kvm/kvm_onhyperv.h
@@ -10,6 +10,26 @@
int hv_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, gfn_t nr_pages);
int hv_flush_remote_tlbs(struct kvm *kvm);
void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp);
+static inline hpa_t hv_get_partition_assist_page(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Partition assist page is something which Hyper-V running in L0
+ * requires from KVM running in L1 before direct TLB flush for L2
+ * guests can be enabled. KVM doesn't currently use the page but to
+ * comply with TLFS it still needs to be allocated. For now, this
+ * is a single page shared among all vCPUs.
+ */
+ struct hv_partition_assist_pg **p_hv_pa_pg =
+ &vcpu->kvm->arch.hv_pa_pg;
+
+ if (!*p_hv_pa_pg)
+ *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+
+ if (!*p_hv_pa_pg)
+ return INVALID_PAGE;
+
+ return __pa(*p_hv_pa_pg);
+}
#else /* !CONFIG_HYPERV */
static inline int hv_flush_remote_tlbs(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 245b20973cae..3242f3da2457 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1475,8 +1475,7 @@ static int apic_set_eoi(struct kvm_lapic *apic)
apic_clear_isr(vector, apic);
apic_update_ppr(apic);
- if (to_hv_vcpu(apic->vcpu) &&
- test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
+ if (kvm_hv_synic_has_vector(apic->vcpu, vector))
kvm_hv_synic_send_eoi(apic->vcpu, vector);
kvm_ioapic_send_eoi(apic, vector);
@@ -2905,7 +2904,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
*/
apic_clear_irr(vector, apic);
- if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
+ if (kvm_hv_synic_auto_eoi_set(vcpu, vector)) {
/*
* For auto-EOI interrupts, there might be another pending
* interrupt above PPR, so check whether to raise another
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bb8c86eefac0..60f21bb4c27b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -146,6 +146,14 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
}
+static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
+{
+ if (!guest_can_use(vcpu, X86_FEATURE_LAM))
+ return 0;
+
+ return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
+}
+
static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
{
u64 root_hpa = vcpu->arch.mmu->root.hpa;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0b1f991b9a31..2d6cdeab1f8a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -271,15 +271,11 @@ static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu,
static inline bool kvm_available_flush_remote_tlbs_range(void)
{
+#if IS_ENABLED(CONFIG_HYPERV)
return kvm_x86_ops.flush_remote_tlbs_range;
-}
-
-int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
-{
- if (!kvm_x86_ops.flush_remote_tlbs_range)
- return -EOPNOTSUPP;
-
- return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
+#else
+ return false;
+#endif
}
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@@ -795,16 +791,26 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
return &slot->arch.lpage_info[level - 2][idx];
}
+/*
+ * The most significant bit in disallow_lpage tracks whether or not memory
+ * attributes are mixed, i.e. not identical for all gfns at the current level.
+ * The lower order bits are used to refcount other cases where a hugepage is
+ * disallowed, e.g. if KVM has shadow a page table at the gfn.
+ */
+#define KVM_LPAGE_MIXED_FLAG BIT(31)
+
static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
gfn_t gfn, int count)
{
struct kvm_lpage_info *linfo;
- int i;
+ int old, i;
for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
linfo = lpage_info_slot(gfn, slot, i);
+
+ old = linfo->disallow_lpage;
linfo->disallow_lpage += count;
- WARN_ON_ONCE(linfo->disallow_lpage < 0);
+ WARN_ON_ONCE((old ^ linfo->disallow_lpage) & KVM_LPAGE_MIXED_FLAG);
}
}
@@ -1382,7 +1388,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
if (READ_ONCE(eager_page_split))
- kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K);
+ kvm_mmu_try_split_huge_pages(kvm, slot, start, end + 1, PG_LEVEL_4K);
kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
@@ -2840,9 +2846,9 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
/*
* Recheck after taking the spinlock, a different vCPU
* may have since marked the page unsync. A false
- * positive on the unprotected check above is not
+ * negative on the unprotected check above is not
* possible as clearing sp->unsync _must_ hold mmu_lock
- * for write, i.e. unsync cannot transition from 0->1
+ * for write, i.e. unsync cannot transition from 1->0
* while this CPU holds mmu_lock for read (or write).
*/
if (READ_ONCE(sp->unsync))
@@ -3056,7 +3062,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
*
* There are several ways to safely use this helper:
*
- * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
+ * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
* consuming it. In this case, mmu_lock doesn't need to be held during the
* lookup, but it does need to be held while checking the MMU notifier.
*
@@ -3137,9 +3143,9 @@ out:
return level;
}
-int kvm_mmu_max_mapping_level(struct kvm *kvm,
- const struct kvm_memory_slot *slot, gfn_t gfn,
- int max_level)
+static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
+ const struct kvm_memory_slot *slot,
+ gfn_t gfn, int max_level, bool is_private)
{
struct kvm_lpage_info *linfo;
int host_level;
@@ -3151,6 +3157,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
break;
}
+ if (is_private)
+ return max_level;
+
if (max_level == PG_LEVEL_4K)
return PG_LEVEL_4K;
@@ -3158,6 +3167,16 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
return min(host_level, max_level);
}
+int kvm_mmu_max_mapping_level(struct kvm *kvm,
+ const struct kvm_memory_slot *slot, gfn_t gfn,
+ int max_level)
+{
+ bool is_private = kvm_slot_can_be_private(slot) &&
+ kvm_mem_is_private(kvm, gfn);
+
+ return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private);
+}
+
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
struct kvm_memory_slot *slot = fault->slot;
@@ -3178,8 +3197,9 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
* Enforce the iTLB multihit workaround after capturing the requested
* level, which will be used to do precise, accurate accounting.
*/
- fault->req_level = kvm_mmu_max_mapping_level(vcpu->kvm, slot,
- fault->gfn, fault->max_level);
+ fault->req_level = __kvm_mmu_max_mapping_level(vcpu->kvm, slot,
+ fault->gfn, fault->max_level,
+ fault->is_private);
if (fault->req_level == PG_LEVEL_4K || fault->huge_page_disallowed)
return;
@@ -3556,7 +3576,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
return;
if (is_tdp_mmu_page(sp))
- kvm_tdp_mmu_put_root(kvm, sp, false);
+ kvm_tdp_mmu_put_root(kvm, sp);
else if (!--sp->root_count && sp->role.invalid)
kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
@@ -3739,7 +3759,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
kvm_page_track_write_tracking_enabled(kvm))
goto out_success;
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(slot, bkt, slots) {
/*
@@ -3782,7 +3802,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
hpa_t root;
root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
- root_gfn = root_pgd >> PAGE_SHIFT;
+ root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
mmu->root.hpa = kvm_mmu_get_dummy_root();
@@ -4259,6 +4279,55 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
}
+static inline u8 kvm_max_level_for_order(int order)
+{
+ BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
+
+ KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
+ order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
+ order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
+
+ if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
+ return PG_LEVEL_1G;
+
+ if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
+ return PG_LEVEL_2M;
+
+ return PG_LEVEL_4K;
+}
+
+static void kvm_mmu_prepare_memory_fault_exit(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault)
+{
+ kvm_prepare_memory_fault_exit(vcpu, fault->gfn << PAGE_SHIFT,
+ PAGE_SIZE, fault->write, fault->exec,
+ fault->is_private);
+}
+
+static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault)
+{
+ int max_order, r;
+
+ if (!kvm_slot_can_be_private(fault->slot)) {
+ kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
+ return -EFAULT;
+ }
+
+ r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
+ &max_order);
+ if (r) {
+ kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
+ return r;
+ }
+
+ fault->max_level = min(kvm_max_level_for_order(max_order),
+ fault->max_level);
+ fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
+
+ return RET_PF_CONTINUE;
+}
+
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
struct kvm_memory_slot *slot = fault->slot;
@@ -4291,6 +4360,14 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
return RET_PF_EMULATE;
}
+ if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
+ kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
+ return -EFAULT;
+ }
+
+ if (fault->is_private)
+ return kvm_faultin_pfn_private(vcpu, fault);
+
async = false;
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
fault->write, &fault->map_writable,
@@ -4366,7 +4443,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
return true;
return fault->slot &&
- mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva);
+ mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
}
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -6228,7 +6305,7 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
if (!kvm_memslots_have_rmaps(kvm))
return flush;
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
@@ -6260,7 +6337,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
write_lock(&kvm->mmu_lock);
- kvm_mmu_invalidate_begin(kvm, 0, -1ul);
+ kvm_mmu_invalidate_begin(kvm);
+
+ kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end);
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
@@ -6270,7 +6349,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
if (flush)
kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
- kvm_mmu_invalidate_end(kvm, 0, -1ul);
+ kvm_mmu_invalidate_end(kvm);
write_unlock(&kvm->mmu_lock);
}
@@ -6723,7 +6802,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
* modifier prior to checking for a wrap of the MMIO generation so
* that a wrap in any address space is detected.
*/
- gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1);
+ gen &= ~((u64)kvm_arch_nr_memslot_as_ids(kvm) - 1);
/*
* The very rare case: if the MMIO generation number has wrapped,
@@ -7176,3 +7255,163 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
if (kvm->arch.nx_huge_page_recovery_thread)
kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
}
+
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
+ struct kvm_gfn_range *range)
+{
+ /*
+ * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only
+ * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM
+ * can simply ignore such slots. But if userspace is making memory
+ * PRIVATE, then KVM must prevent the guest from accessing the memory
+ * as shared. And if userspace is making memory SHARED and this point
+ * is reached, then at least one page within the range was previously
+ * PRIVATE, i.e. the slot's possible hugepage ranges are changing.
+ * Zapping SPTEs in this case ensures KVM will reassess whether or not
+ * a hugepage can be used for affected ranges.
+ */
+ if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
+ return false;
+
+ return kvm_unmap_gfn_range(kvm, range);
+}
+
+static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
+ int level)
+{
+ return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG;
+}
+
+static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
+ int level)
+{
+ lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG;
+}
+
+static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn,
+ int level)
+{
+ lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG;
+}
+
+static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, int level, unsigned long attrs)
+{
+ const unsigned long start = gfn;
+ const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
+
+ if (level == PG_LEVEL_2M)
+ return kvm_range_has_memory_attributes(kvm, start, end, attrs);
+
+ for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
+ if (hugepage_test_mixed(slot, gfn, level - 1) ||
+ attrs != kvm_get_memory_attributes(kvm, gfn))
+ return false;
+ }
+ return true;
+}
+
+bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
+ struct kvm_gfn_range *range)
+{
+ unsigned long attrs = range->arg.attributes;
+ struct kvm_memory_slot *slot = range->slot;
+ int level;
+
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ lockdep_assert_held(&kvm->slots_lock);
+
+ /*
+ * Calculate which ranges can be mapped with hugepages even if the slot
+ * can't map memory PRIVATE. KVM mustn't create a SHARED hugepage over
+ * a range that has PRIVATE GFNs, and conversely converting a range to
+ * SHARED may now allow hugepages.
+ */
+ if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
+ return false;
+
+ /*
+ * The sequence matters here: upper levels consume the result of lower
+ * level's scanning.
+ */
+ for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+ gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
+ gfn_t gfn = gfn_round_for_level(range->start, level);
+
+ /* Process the head page if it straddles the range. */
+ if (gfn != range->start || gfn + nr_pages > range->end) {
+ /*
+ * Skip mixed tracking if the aligned gfn isn't covered
+ * by the memslot, KVM can't use a hugepage due to the
+ * misaligned address regardless of memory attributes.
+ */
+ if (gfn >= slot->base_gfn) {
+ if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
+ hugepage_clear_mixed(slot, gfn, level);
+ else
+ hugepage_set_mixed(slot, gfn, level);
+ }
+ gfn += nr_pages;
+ }
+
+ /*
+ * Pages entirely covered by the range are guaranteed to have
+ * only the attributes which were just set.
+ */
+ for ( ; gfn + nr_pages <= range->end; gfn += nr_pages)
+ hugepage_clear_mixed(slot, gfn, level);
+
+ /*
+ * Process the last tail page if it straddles the range and is
+ * contained by the memslot. Like the head page, KVM can't
+ * create a hugepage if the slot size is misaligned.
+ */
+ if (gfn < range->end &&
+ (gfn + nr_pages) <= (slot->base_gfn + slot->npages)) {
+ if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
+ hugepage_clear_mixed(slot, gfn, level);
+ else
+ hugepage_set_mixed(slot, gfn, level);
+ }
+ }
+ return false;
+}
+
+void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
+{
+ int level;
+
+ if (!kvm_arch_has_private_mem(kvm))
+ return;
+
+ for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+ /*
+ * Don't bother tracking mixed attributes for pages that can't
+ * be huge due to alignment, i.e. process only pages that are
+ * entirely contained by the memslot.
+ */
+ gfn_t end = gfn_round_for_level(slot->base_gfn + slot->npages, level);
+ gfn_t start = gfn_round_for_level(slot->base_gfn, level);
+ gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level);
+ gfn_t gfn;
+
+ if (start < slot->base_gfn)
+ start += nr_pages;
+
+ /*
+ * Unlike setting attributes, every potential hugepage needs to
+ * be manually checked as the attributes may already be mixed.
+ */
+ for (gfn = start; gfn < end; gfn += nr_pages) {
+ unsigned long attrs = kvm_get_memory_attributes(kvm, gfn);
+
+ if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
+ hugepage_clear_mixed(slot, gfn, level);
+ else
+ hugepage_set_mixed(slot, gfn, level);
+ }
+ }
+}
+#endif
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index decc1f153669..0669a8a668ca 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -13,6 +13,7 @@
#endif
/* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
+#define __PT_BASE_ADDR_MASK GENMASK_ULL(51, 12)
#define __PT_LEVEL_SHIFT(level, bits_per_level) \
(PAGE_SHIFT + ((level) - 1) * (bits_per_level))
#define __PT_INDEX(address, level, bits_per_level) \
@@ -201,6 +202,7 @@ struct kvm_page_fault {
/* Derived from mmu and global state. */
const bool is_tdp;
+ const bool is_private;
const bool nx_huge_page_workaround_enabled;
/*
@@ -296,6 +298,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
.max_level = KVM_MAX_HUGEPAGE_LEVEL,
.req_level = PG_LEVEL_4K,
.goal_level = PG_LEVEL_4K,
+ .is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
};
int r;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index c85255073f67..4d4e98fe4f35 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -62,7 +62,7 @@
#endif
/* Common logic, but per-type values. These also need to be undefined. */
-#define PT_BASE_ADDR_MASK ((pt_element_t)(((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)))
+#define PT_BASE_ADDR_MASK ((pt_element_t)__PT_BASE_ADDR_MASK)
#define PT_LVL_ADDR_MASK(lvl) __PT_LVL_ADDR_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
#define PT_LVL_OFFSET_MASK(lvl) __PT_LVL_OFFSET_MASK(PT_BASE_ADDR_MASK, lvl, PT_LEVEL_BITS)
#define PT_INDEX(addr, lvl) __PT_INDEX(addr, lvl, PT_LEVEL_BITS)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6cd4dd631a2f..6ae19b4ee5b1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -73,11 +73,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
tdp_mmu_free_sp(sp);
}
-void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
- bool shared)
+void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
{
- kvm_lockdep_assert_mmu_lock_held(kvm, shared);
-
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
return;
@@ -106,10 +103,16 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
*/
static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
struct kvm_mmu_page *prev_root,
- bool shared, bool only_valid)
+ bool only_valid)
{
struct kvm_mmu_page *next_root;
+ /*
+ * While the roots themselves are RCU-protected, fields such as
+ * role.invalid are protected by mmu_lock.
+ */
+ lockdep_assert_held(&kvm->mmu_lock);
+
rcu_read_lock();
if (prev_root)
@@ -132,7 +135,7 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
rcu_read_unlock();
if (prev_root)
- kvm_tdp_mmu_put_root(kvm, prev_root, shared);
+ kvm_tdp_mmu_put_root(kvm, prev_root);
return next_root;
}
@@ -144,26 +147,22 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
* recent root. (Unless keeping a live reference is desirable.)
*
* If shared is set, this function is operating under the MMU lock in read
- * mode. In the unlikely event that this thread must free a root, the lock
- * will be temporarily dropped and reacquired in write mode.
+ * mode.
*/
-#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\
- for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \
- _root; \
- _root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \
- if (kvm_lockdep_assert_mmu_lock_held(_kvm, _shared) && \
- kvm_mmu_page_as_id(_root) != _as_id) { \
+#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)\
+ for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid); \
+ ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
+ _root = tdp_mmu_next_root(_kvm, _root, _only_valid)) \
+ if (kvm_mmu_page_as_id(_root) != _as_id) { \
} else
-#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
- __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
+#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \
+ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, true)
-#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \
- for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \
- _root; \
- _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \
- if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \
- } else
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \
+ for (_root = tdp_mmu_next_root(_kvm, NULL, false); \
+ ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
+ _root = tdp_mmu_next_root(_kvm, _root, false))
/*
* Iterate over all TDP MMU roots. Requires that mmu_lock be held for write,
@@ -276,28 +275,18 @@ static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
*
* @kvm: kvm instance
* @sp: the page to be removed
- * @shared: This operation may not be running under the exclusive use of
- * the MMU lock and the operation must synchronize with other
- * threads that might be adding or removing pages.
*/
-static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
- bool shared)
+static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
{
tdp_unaccount_mmu_page(kvm, sp);
if (!sp->nx_huge_page_disallowed)
return;
- if (shared)
- spin_lock(&kvm->arch.tdp_mmu_pages_lock);
- else
- lockdep_assert_held_write(&kvm->mmu_lock);
-
+ spin_lock(&kvm->arch.tdp_mmu_pages_lock);
sp->nx_huge_page_disallowed = false;
untrack_possible_nx_huge_page(kvm, sp);
-
- if (shared)
- spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+ spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
}
/**
@@ -326,7 +315,7 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
trace_kvm_mmu_prepare_zap_page(sp);
- tdp_mmu_unlink_sp(kvm, sp, shared);
+ tdp_mmu_unlink_sp(kvm, sp);
for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
tdp_ptep_t sptep = pt + i;
@@ -832,7 +821,8 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
{
struct kvm_mmu_page *root;
- for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ for_each_tdp_mmu_root_yield_safe(kvm, root)
flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
return flush;
@@ -854,7 +844,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
* is being destroyed or the userspace VMM has exited. In both cases,
* KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
*/
- for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ lockdep_assert_held_write(&kvm->mmu_lock);
+ for_each_tdp_mmu_root_yield_safe(kvm, root)
tdp_mmu_zap_root(kvm, root, false);
}
@@ -868,7 +859,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
read_lock(&kvm->mmu_lock);
- for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
if (!root->tdp_mmu_scheduled_root_to_zap)
continue;
@@ -891,7 +882,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
* the root must be reachable by mmu_notifiers while it's being
* zapped
*/
- kvm_tdp_mmu_put_root(kvm, root, true);
+ kvm_tdp_mmu_put_root(kvm, root);
}
read_unlock(&kvm->mmu_lock);
@@ -1125,7 +1116,7 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
{
struct kvm_mmu_page *root;
- __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false)
+ __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
range->may_block, flush);
@@ -1314,7 +1305,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
lockdep_assert_held_read(&kvm->mmu_lock);
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages, min_level);
@@ -1346,6 +1337,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
{
struct kvm_mmu_page *sp;
+ kvm_lockdep_assert_mmu_lock_held(kvm, shared);
+
/*
* Since we are allocating while under the MMU lock we have to be
* careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct
@@ -1496,11 +1489,10 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
int r = 0;
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
-
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, shared) {
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) {
r = tdp_mmu_split_huge_pages_root(kvm, root, start, end, target_level, shared);
if (r) {
- kvm_tdp_mmu_put_root(kvm, root, shared);
+ kvm_tdp_mmu_put_root(kvm, root);
break;
}
}
@@ -1522,12 +1514,13 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
rcu_read_lock();
- tdp_root_for_each_leaf_pte(iter, root, start, end) {
+ tdp_root_for_each_pte(iter, root, start, end) {
retry:
- if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
+ if (!is_shadow_present_pte(iter.old_spte) ||
+ !is_last_spte(iter.old_spte, iter.level))
continue;
- if (!is_shadow_present_pte(iter.old_spte))
+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
@@ -1560,8 +1553,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
bool spte_set = false;
lockdep_assert_held_read(&kvm->mmu_lock);
-
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
@@ -1695,8 +1687,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
struct kvm_mmu_page *root;
lockdep_assert_held_read(&kvm->mmu_lock);
-
- for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
+ for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id)
zap_collapsible_spte_range(kvm, root, slot);
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 733a3aef3a96..20d97aa46c49 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -17,8 +17,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
return refcount_inc_not_zero(&root->tdp_mmu_root_count);
}
-void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
- bool shared);
+void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root);
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 9ae07db6f0f6..87cc6c8809ad 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -127,9 +127,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
/*
- * Ignore overflow events for counters that are scheduled to be
- * reprogrammed, e.g. if a PMI for the previous event races with KVM's
- * handling of a related guest WRMSR.
+ * Ignore asynchronous overflow events for counters that are scheduled
+ * to be reprogrammed, e.g. if a PMI for the previous event races with
+ * KVM's handling of a related guest WRMSR.
*/
if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi))
return;
@@ -161,6 +161,15 @@ static u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc)
return 1;
}
+static u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
+{
+ u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
+
+ if (!sample_period)
+ sample_period = pmc_bitmask(pmc) + 1;
+ return sample_period;
+}
+
static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
bool exclude_user, bool exclude_kernel,
bool intr)
@@ -215,17 +224,30 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
return 0;
}
-static void pmc_pause_counter(struct kvm_pmc *pmc)
+static bool pmc_pause_counter(struct kvm_pmc *pmc)
{
u64 counter = pmc->counter;
-
- if (!pmc->perf_event || pmc->is_paused)
- return;
+ u64 prev_counter;
/* update counter, reset event value to avoid redundant accumulation */
- counter += perf_event_pause(pmc->perf_event, true);
+ if (pmc->perf_event && !pmc->is_paused)
+ counter += perf_event_pause(pmc->perf_event, true);
+
+ /*
+ * Snapshot the previous counter *after* accumulating state from perf.
+ * If overflow already happened, hardware (via perf) is responsible for
+ * generating a PMI. KVM just needs to detect overflow on emulated
+ * counter events that haven't yet been processed.
+ */
+ prev_counter = counter & pmc_bitmask(pmc);
+
+ counter += pmc->emulated_counter;
pmc->counter = counter & pmc_bitmask(pmc);
+
+ pmc->emulated_counter = 0;
pmc->is_paused = true;
+
+ return pmc->counter < prev_counter;
}
static bool pmc_resume_counter(struct kvm_pmc *pmc)
@@ -250,6 +272,51 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
return true;
}
+static void pmc_release_perf_event(struct kvm_pmc *pmc)
+{
+ if (pmc->perf_event) {
+ perf_event_release_kernel(pmc->perf_event);
+ pmc->perf_event = NULL;
+ pmc->current_config = 0;
+ pmc_to_pmu(pmc)->event_count--;
+ }
+}
+
+static void pmc_stop_counter(struct kvm_pmc *pmc)
+{
+ if (pmc->perf_event) {
+ pmc->counter = pmc_read_counter(pmc);
+ pmc_release_perf_event(pmc);
+ }
+}
+
+static void pmc_update_sample_period(struct kvm_pmc *pmc)
+{
+ if (!pmc->perf_event || pmc->is_paused ||
+ !is_sampling_event(pmc->perf_event))
+ return;
+
+ perf_event_period(pmc->perf_event,
+ get_sample_period(pmc, pmc->counter));
+}
+
+void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
+{
+ /*
+ * Drop any unconsumed accumulated counts, the WRMSR is a write, not a
+ * read-modify-write. Adjust the counter value so that its value is
+ * relative to the current count, as reading the current count from
+ * perf is faster than pausing and repgrogramming the event in order to
+ * reset it to '0'. Note, this very sneakily offsets the accumulated
+ * emulated count too, by using pmc_read_counter()!
+ */
+ pmc->emulated_counter = 0;
+ pmc->counter += val - pmc_read_counter(pmc);
+ pmc->counter &= pmc_bitmask(pmc);
+ pmc_update_sample_period(pmc);
+}
+EXPORT_SYMBOL_GPL(pmc_write_counter);
+
static int filter_cmp(const void *pa, const void *pb, u64 mask)
{
u64 a = *(u64 *)pa & mask;
@@ -383,14 +450,15 @@ static void reprogram_counter(struct kvm_pmc *pmc)
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
u64 eventsel = pmc->eventsel;
u64 new_config = eventsel;
+ bool emulate_overflow;
u8 fixed_ctr_ctrl;
- pmc_pause_counter(pmc);
+ emulate_overflow = pmc_pause_counter(pmc);
if (!pmc_event_is_allowed(pmc))
goto reprogram_complete;
- if (pmc->counter < pmc->prev_counter)
+ if (emulate_overflow)
__kvm_perf_overflow(pmc, false);
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
@@ -430,7 +498,6 @@ static void reprogram_counter(struct kvm_pmc *pmc)
reprogram_complete:
clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
- pmc->prev_counter = 0;
}
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
@@ -639,32 +706,60 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
}
-/* refresh PMU settings. This function generally is called when underlying
- * settings are changed (such as changes of PMU CPUID by guest VMs), which
- * should rarely happen.
+static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+ int i;
+
+ pmu->need_cleanup = false;
+
+ bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX);
+
+ for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
+ pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
+ if (!pmc)
+ continue;
+
+ pmc_stop_counter(pmc);
+ pmc->counter = 0;
+ pmc->emulated_counter = 0;
+
+ if (pmc_is_gp(pmc))
+ pmc->eventsel = 0;
+ }
+
+ pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
+
+ static_call_cond(kvm_x86_pmu_reset)(vcpu);
+}
+
+
+/*
+ * Refresh the PMU configuration for the vCPU, e.g. if userspace changes CPUID
+ * and/or PERF_CAPABILITIES.
*/
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
{
if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
return;
+ /*
+ * Stop/release all existing counters/events before realizing the new
+ * vPMU model.
+ */
+ kvm_pmu_reset(vcpu);
+
bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
static_call(kvm_x86_pmu_refresh)(vcpu);
}
-void kvm_pmu_reset(struct kvm_vcpu *vcpu)
-{
- static_call(kvm_x86_pmu_reset)(vcpu);
-}
-
void kvm_pmu_init(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
memset(pmu, 0, sizeof(*pmu));
static_call(kvm_x86_pmu_init)(vcpu);
- pmu->event_count = 0;
- pmu->need_cleanup = false;
kvm_pmu_refresh(vcpu);
}
@@ -700,8 +795,7 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
{
- pmc->prev_counter = pmc->counter;
- pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
+ pmc->emulated_counter++;
kvm_pmu_request_counter_reprogram(pmc);
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 1d64113de488..7caeb3d8d4fd 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -66,7 +66,8 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
{
u64 counter, enabled, running;
- counter = pmc->counter;
+ counter = pmc->counter + pmc->emulated_counter;
+
if (pmc->perf_event && !pmc->is_paused)
counter += perf_event_read_value(pmc->perf_event,
&enabled, &running);
@@ -74,29 +75,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
return counter & pmc_bitmask(pmc);
}
-static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
-{
- pmc->counter += val - pmc_read_counter(pmc);
- pmc->counter &= pmc_bitmask(pmc);
-}
-
-static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
-{
- if (pmc->perf_event) {
- perf_event_release_kernel(pmc->perf_event);
- pmc->perf_event = NULL;
- pmc->current_config = 0;
- pmc_to_pmu(pmc)->event_count--;
- }
-}
-
-static inline void pmc_stop_counter(struct kvm_pmc *pmc)
-{
- if (pmc->perf_event) {
- pmc->counter = pmc_read_counter(pmc);
- pmc_release_perf_event(pmc);
- }
-}
+void pmc_write_counter(struct kvm_pmc *pmc, u64 val);
static inline bool pmc_is_gp(struct kvm_pmc *pmc)
{
@@ -146,25 +125,6 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
return NULL;
}
-static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
-{
- u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
-
- if (!sample_period)
- sample_period = pmc_bitmask(pmc) + 1;
- return sample_period;
-}
-
-static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
-{
- if (!pmc->perf_event || pmc->is_paused ||
- !is_sampling_event(pmc->perf_event))
- return;
-
- perf_event_period(pmc->perf_event,
- get_sample_period(pmc, pmc->counter));
-}
-
static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -261,7 +221,6 @@ bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
-void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index b81650678375..aadefcaa9561 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -16,6 +16,7 @@ enum kvm_only_cpuid_leafs {
CPUID_7_1_EDX,
CPUID_8000_0007_EDX,
CPUID_8000_0022_EAX,
+ CPUID_7_2_EDX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -46,6 +47,14 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
+/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */
+#define X86_FEATURE_INTEL_PSFD KVM_X86_FEATURE(CPUID_7_2_EDX, 0)
+#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
+#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
+#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
+#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
+
/* CPUID level 0x80000007 (EDX). */
#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8)
@@ -80,6 +89,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0007_EDX] = {0x80000007, 0, CPUID_EDX},
[CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
+ [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
};
/*
@@ -106,18 +116,19 @@ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
*/
static __always_inline u32 __feature_translate(int x86_feature)
{
- if (x86_feature == X86_FEATURE_SGX1)
- return KVM_X86_FEATURE_SGX1;
- else if (x86_feature == X86_FEATURE_SGX2)
- return KVM_X86_FEATURE_SGX2;
- else if (x86_feature == X86_FEATURE_SGX_EDECCSSA)
- return KVM_X86_FEATURE_SGX_EDECCSSA;
- else if (x86_feature == X86_FEATURE_CONSTANT_TSC)
- return KVM_X86_FEATURE_CONSTANT_TSC;
- else if (x86_feature == X86_FEATURE_PERFMON_V2)
- return KVM_X86_FEATURE_PERFMON_V2;
-
- return x86_feature;
+#define KVM_X86_TRANSLATE_FEATURE(f) \
+ case X86_FEATURE_##f: return KVM_X86_FEATURE_##f
+
+ switch (x86_feature) {
+ KVM_X86_TRANSLATE_FEATURE(SGX1);
+ KVM_X86_TRANSLATE_FEATURE(SGX2);
+ KVM_X86_TRANSLATE_FEATURE(SGX_EDECCSSA);
+ KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
+ KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
+ KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ default:
+ return x86_feature;
+ }
}
static __always_inline u32 __feature_leaf(int x86_feature)
diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h
index 02f4784b5d44..d3f8bfc05832 100644
--- a/arch/x86/kvm/svm/hyperv.h
+++ b/arch/x86/kvm/svm/hyperv.h
@@ -11,6 +11,7 @@
#include "../hyperv.h"
#include "svm.h"
+#ifdef CONFIG_KVM_HYPERV
static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -41,5 +42,13 @@ static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
}
void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+#else /* CONFIG_KVM_HYPERV */
+static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
+static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+static inline void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) {}
+#endif /* CONFIG_KVM_HYPERV */
#endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3fea8c47679e..dee62362a360 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -187,7 +187,6 @@ void recalc_intercepts(struct vcpu_svm *svm)
*/
static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
{
- struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
int i;
/*
@@ -198,11 +197,16 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
* - Nested hypervisor (L1) is using Hyper-V emulation interface and
* tells KVM (L0) there were no changes in MSR bitmap for L2.
*/
- if (!svm->nested.force_msr_bitmap_recalc &&
- kvm_hv_hypercall_enabled(&svm->vcpu) &&
- hve->hv_enlightenments_control.msr_bitmap &&
- (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS)))
- goto set_msrpm_base_pa;
+#ifdef CONFIG_KVM_HYPERV
+ if (!svm->nested.force_msr_bitmap_recalc) {
+ struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
+
+ if (kvm_hv_hypercall_enabled(&svm->vcpu) &&
+ hve->hv_enlightenments_control.msr_bitmap &&
+ (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS)))
+ goto set_msrpm_base_pa;
+ }
+#endif
if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
return true;
@@ -230,7 +234,9 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
svm->nested.force_msr_bitmap_recalc = false;
+#ifdef CONFIG_KVM_HYPERV
set_msrpm_base_pa:
+#endif
svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
return true;
@@ -247,18 +253,6 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
}
-static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
-{
- /* Nested FLUSHBYASID is not supported yet. */
- switch(tlb_ctl) {
- case TLB_CONTROL_DO_NOTHING:
- case TLB_CONTROL_FLUSH_ALL_ASID:
- return true;
- default:
- return false;
- }
-}
-
static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
struct vmcb_ctrl_area_cached *control)
{
@@ -278,9 +272,6 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
IOPM_SIZE)))
return false;
- if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
- return false;
-
if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
!vmcb12_is_intercept(control, INTERCEPT_NMI))) {
return false;
@@ -311,7 +302,7 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
if (CC(!(save->cr4 & X86_CR4_PAE)) ||
CC(!(save->cr0 & X86_CR0_PE)) ||
- CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
+ CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
return false;
}
@@ -378,12 +369,14 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
to->msrpm_base_pa &= ~0x0fffULL;
to->iopm_base_pa &= ~0x0fffULL;
+#ifdef CONFIG_KVM_HYPERV
/* Hyper-V extensions (Enlightened VMCB) */
if (kvm_hv_hypercall_enabled(vcpu)) {
to->clean = from->clean;
memcpy(&to->hv_enlightenments, &from->hv_enlightenments,
sizeof(to->hv_enlightenments));
}
+#endif
}
void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
@@ -487,14 +480,8 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
{
- /*
- * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
- * L2's VP_ID upon request from the guest. Make sure we check for
- * pending entries in the right FIFO upon L1/L2 transition as these
- * requests are put by other vCPUs asynchronously.
- */
- if (to_hv_vcpu(vcpu) && npt_enabled)
- kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+ /* Handle pending Hyper-V TLB flush requests */
+ kvm_hv_nested_transtion_tlb_flush(vcpu, npt_enabled);
/*
* TODO: optimize unconditional TLB flush/MMU sync. A partial list of
@@ -520,7 +507,7 @@ static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
bool nested_npt, bool reload_pdptrs)
{
- if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
+ if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3)))
return -EINVAL;
if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 373ff6a6687b..b6a7ad4d6914 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -161,7 +161,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
pmc_write_counter(pmc, data);
- pmc_update_sample_period(pmc);
return 0;
}
/* MSR_EVNTSELn */
@@ -233,21 +232,6 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
}
}
-static void amd_pmu_reset(struct kvm_vcpu *vcpu)
-{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- int i;
-
- for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
- struct kvm_pmc *pmc = &pmu->gp_counters[i];
-
- pmc_stop_counter(pmc);
- pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
- }
-
- pmu->global_ctrl = pmu->global_status = 0;
-}
-
struct kvm_pmu_ops amd_pmu_ops __initdata = {
.hw_event_available = amd_hw_event_available,
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
@@ -259,7 +243,6 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = {
.set_msr = amd_pmu_set_msr,
.refresh = amd_pmu_refresh,
.init = amd_pmu_init,
- .reset = amd_pmu_reset,
.EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT,
.MAX_NR_GP_COUNTERS = KVM_AMD_PMC_MAX_GENERIC,
.MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6ee925d66648..f760106c31f8 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2191,10 +2191,13 @@ void __init sev_hardware_setup(void)
/*
* SEV must obviously be supported in hardware. Sanity check that the
* CPU supports decode assists, which is mandatory for SEV guests to
- * support instruction emulation.
+ * support instruction emulation. Ditto for flushing by ASID, as SEV
+ * guests are bound to a single ASID, i.e. KVM can't rotate to a new
+ * ASID to effect a TLB flush.
*/
if (!boot_cpu_has(X86_FEATURE_SEV) ||
- WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))
+ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) ||
+ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID)))
goto out;
/* Retrieve SEV CPUID information */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7fb51424fc74..e90b429c84f1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3563,8 +3563,15 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
if (svm->nmi_l1_to_l2)
return;
- svm->nmi_masked = true;
- svm_set_iret_intercept(svm);
+ /*
+ * No need to manually track NMI masking when vNMI is enabled, hardware
+ * automatically sets V_NMI_BLOCKING_MASK as appropriate, including the
+ * case where software directly injects an NMI.
+ */
+ if (!is_vnmi_enabled(svm)) {
+ svm->nmi_masked = true;
+ svm_set_iret_intercept(svm);
+ }
++vcpu->stat.nmi_injections;
}
@@ -5079,6 +5086,13 @@ static __init void svm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_SVM);
kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
+ /*
+ * KVM currently flushes TLBs on *every* nested SVM transition,
+ * and so for all intents and purposes KVM supports flushing by
+ * ASID, i.e. KVM is guaranteed to honor every L1 ASID flush.
+ */
+ kvm_cpu_cap_set(X86_FEATURE_FLUSHBYASID);
+
if (nrips)
kvm_cpu_cap_set(X86_FEATURE_NRIPS);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index c409f934c377..8ef95139cd24 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -148,7 +148,9 @@ struct vmcb_ctrl_area_cached {
u64 virt_ext;
u32 clean;
union {
+#if IS_ENABLED(CONFIG_HYPERV) || IS_ENABLED(CONFIG_KVM_HYPERV)
struct hv_vmcb_enlightenments hv_enlightenments;
+#endif
u8 reserved_sw[32];
};
};
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
index 7af8422d3382..3971b3ea5d04 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.c
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -18,18 +18,14 @@
int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
{
struct hv_vmcb_enlightenments *hve;
- struct hv_partition_assist_pg **p_hv_pa_pg =
- &to_kvm_hv(vcpu->kvm)->hv_pa_pg;
+ hpa_t partition_assist_page = hv_get_partition_assist_page(vcpu);
- if (!*p_hv_pa_pg)
- *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
-
- if (!*p_hv_pa_pg)
+ if (partition_assist_page == INVALID_PAGE)
return -ENOMEM;
hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
- hve->partition_assist_page = __pa(*p_hv_pa_pg);
+ hve->partition_assist_page = partition_assist_page;
hve->hv_vm_id = (unsigned long)vcpu->kvm;
if (!hve->hv_enlightenments_control.nested_flush_hypercall) {
hve->hv_enlightenments_control.nested_flush_hypercall = 1;
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index ef2ebabb059c..9499f9c6b077 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -270,16 +270,16 @@ SYM_FUNC_START(__svm_vcpu_run)
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY
-10: cmpb $0, kvm_rebooting
+10: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
ud2
-30: cmpb $0, kvm_rebooting
+30: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 4b
ud2
-50: cmpb $0, kvm_rebooting
+50: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 6b
ud2
-70: cmpb $0, kvm_rebooting
+70: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 8b
ud2
@@ -381,7 +381,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
RESTORE_GUEST_SPEC_CTRL_BODY
RESTORE_HOST_SPEC_CTRL_BODY
-3: cmpb $0, kvm_rebooting
+3: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
ud2
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index 313b8bb5b8a7..fab6a1ad98dc 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -13,419 +13,6 @@
#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
-/*
- * Enlightened VMCSv1 doesn't support these:
- *
- * POSTED_INTR_NV = 0x00000002,
- * GUEST_INTR_STATUS = 0x00000810,
- * APIC_ACCESS_ADDR = 0x00002014,
- * POSTED_INTR_DESC_ADDR = 0x00002016,
- * EOI_EXIT_BITMAP0 = 0x0000201c,
- * EOI_EXIT_BITMAP1 = 0x0000201e,
- * EOI_EXIT_BITMAP2 = 0x00002020,
- * EOI_EXIT_BITMAP3 = 0x00002022,
- * GUEST_PML_INDEX = 0x00000812,
- * PML_ADDRESS = 0x0000200e,
- * VM_FUNCTION_CONTROL = 0x00002018,
- * EPTP_LIST_ADDRESS = 0x00002024,
- * VMREAD_BITMAP = 0x00002026,
- * VMWRITE_BITMAP = 0x00002028,
- *
- * TSC_MULTIPLIER = 0x00002032,
- * PLE_GAP = 0x00004020,
- * PLE_WINDOW = 0x00004022,
- * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
- *
- * Currently unsupported in KVM:
- * GUEST_IA32_RTIT_CTL = 0x00002814,
- */
-#define EVMCS1_SUPPORTED_PINCTRL \
- (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
- PIN_BASED_EXT_INTR_MASK | \
- PIN_BASED_NMI_EXITING | \
- PIN_BASED_VIRTUAL_NMIS)
-
-#define EVMCS1_SUPPORTED_EXEC_CTRL \
- (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
- CPU_BASED_HLT_EXITING | \
- CPU_BASED_CR3_LOAD_EXITING | \
- CPU_BASED_CR3_STORE_EXITING | \
- CPU_BASED_UNCOND_IO_EXITING | \
- CPU_BASED_MOV_DR_EXITING | \
- CPU_BASED_USE_TSC_OFFSETTING | \
- CPU_BASED_MWAIT_EXITING | \
- CPU_BASED_MONITOR_EXITING | \
- CPU_BASED_INVLPG_EXITING | \
- CPU_BASED_RDPMC_EXITING | \
- CPU_BASED_INTR_WINDOW_EXITING | \
- CPU_BASED_CR8_LOAD_EXITING | \
- CPU_BASED_CR8_STORE_EXITING | \
- CPU_BASED_RDTSC_EXITING | \
- CPU_BASED_TPR_SHADOW | \
- CPU_BASED_USE_IO_BITMAPS | \
- CPU_BASED_MONITOR_TRAP_FLAG | \
- CPU_BASED_USE_MSR_BITMAPS | \
- CPU_BASED_NMI_WINDOW_EXITING | \
- CPU_BASED_PAUSE_EXITING | \
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
-
-#define EVMCS1_SUPPORTED_2NDEXEC \
- (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \
- SECONDARY_EXEC_WBINVD_EXITING | \
- SECONDARY_EXEC_ENABLE_VPID | \
- SECONDARY_EXEC_ENABLE_EPT | \
- SECONDARY_EXEC_UNRESTRICTED_GUEST | \
- SECONDARY_EXEC_DESC | \
- SECONDARY_EXEC_ENABLE_RDTSCP | \
- SECONDARY_EXEC_ENABLE_INVPCID | \
- SECONDARY_EXEC_ENABLE_XSAVES | \
- SECONDARY_EXEC_RDSEED_EXITING | \
- SECONDARY_EXEC_RDRAND_EXITING | \
- SECONDARY_EXEC_TSC_SCALING | \
- SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \
- SECONDARY_EXEC_PT_USE_GPA | \
- SECONDARY_EXEC_PT_CONCEAL_VMX | \
- SECONDARY_EXEC_BUS_LOCK_DETECTION | \
- SECONDARY_EXEC_NOTIFY_VM_EXITING | \
- SECONDARY_EXEC_ENCLS_EXITING)
-
-#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
-
-#define EVMCS1_SUPPORTED_VMEXIT_CTRL \
- (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | \
- VM_EXIT_SAVE_DEBUG_CONTROLS | \
- VM_EXIT_ACK_INTR_ON_EXIT | \
- VM_EXIT_HOST_ADDR_SPACE_SIZE | \
- VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
- VM_EXIT_SAVE_IA32_PAT | \
- VM_EXIT_LOAD_IA32_PAT | \
- VM_EXIT_SAVE_IA32_EFER | \
- VM_EXIT_LOAD_IA32_EFER | \
- VM_EXIT_CLEAR_BNDCFGS | \
- VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMENTRY_CTRL \
- (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | \
- VM_ENTRY_LOAD_DEBUG_CONTROLS | \
- VM_ENTRY_IA32E_MODE | \
- VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \
- VM_ENTRY_LOAD_IA32_PAT | \
- VM_ENTRY_LOAD_IA32_EFER | \
- VM_ENTRY_LOAD_BNDCFGS | \
- VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
-
-#define EVMCS1_SUPPORTED_VMFUNC (0)
-
-#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
-#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
- {EVMCS1_OFFSET(name), clean_field}
-
-const struct evmcs_field vmcs_field_to_evmcs_1[] = {
- /* 64 bit rw */
- EVMCS1_FIELD(GUEST_RIP, guest_rip,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(GUEST_RSP, guest_rsp,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
- EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
- EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_CR0, host_cr0,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_CR3, host_cr3,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_CR4, host_cr4,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_RIP, host_rip,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
- EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
- EVMCS1_FIELD(MSR_BITMAP, msr_bitmap,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP),
- EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(TSC_OFFSET, tsc_offset,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
- EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
- EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(GUEST_CR0, guest_cr0,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(GUEST_CR3, guest_cr3,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(GUEST_CR4, guest_cr4,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(GUEST_DR7, guest_dr7,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
- EVMCS1_FIELD(HOST_FS_BASE, host_fs_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
- EVMCS1_FIELD(HOST_GS_BASE, host_gs_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
- EVMCS1_FIELD(HOST_TR_BASE, host_tr_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
- EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
- EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
- EVMCS1_FIELD(HOST_RSP, host_rsp,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
- EVMCS1_FIELD(EPT_POINTER, ept_pointer,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
- EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
- EVMCS1_FIELD(ENCLS_EXITING_BITMAP, encls_exiting_bitmap,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
- EVMCS1_FIELD(TSC_MULTIPLIER, tsc_multiplier,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
- /*
- * Not used by KVM:
- *
- * EVMCS1_FIELD(0x00006828, guest_ia32_s_cet,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- * EVMCS1_FIELD(0x0000682A, guest_ssp,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
- * EVMCS1_FIELD(0x0000682C, guest_ia32_int_ssp_table_addr,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- * EVMCS1_FIELD(0x00002816, guest_ia32_lbr_ctl,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- * EVMCS1_FIELD(0x00006C18, host_ia32_s_cet,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- * EVMCS1_FIELD(0x00006C1A, host_ssp,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- * EVMCS1_FIELD(0x00006C1C, host_ia32_int_ssp_table_addr,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- */
-
- /* 64 bit read only */
- EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- /*
- * Not defined in KVM:
- *
- * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
- * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
- * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
- * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip,
- * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
- */
- EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
-
- /*
- * No mask defined in the spec as Hyper-V doesn't currently support
- * these. Future proof by resetting the whole clean field mask on
- * access.
- */
- EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
- EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
- EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
-
- /* 32 bit rw */
- EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
- EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC),
- EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN),
- EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY),
- EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
- EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE,
- vm_entry_exception_error_code,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
- EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
- EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
- EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
- EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
- EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
- EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
-
- /* 32 bit read only */
- EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
- EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
-
- /* No mask defined in the spec (not used) */
- EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
- EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
- EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
- EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
- EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
- EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
-
- /* 16 bit rw */
- EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
- EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
- EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id,
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
-};
-const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
-
u64 nested_get_evmptr(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
@@ -608,40 +195,6 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
return 0;
}
-#if IS_ENABLED(CONFIG_HYPERV)
-DEFINE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
-
-/*
- * KVM on Hyper-V always uses the latest known eVMCSv1 revision, the assumption
- * is: in case a feature has corresponding fields in eVMCS described and it was
- * exposed in VMX feature MSRs, KVM is free to use it. Warn if KVM meets a
- * feature which has no corresponding eVMCS field, this likely means that KVM
- * needs to be updated.
- */
-#define evmcs_check_vmcs_conf(field, ctrl) \
- do { \
- typeof(vmcs_conf->field) unsupported; \
- \
- unsupported = vmcs_conf->field & ~EVMCS1_SUPPORTED_ ## ctrl; \
- if (unsupported) { \
- pr_warn_once(#field " unsupported with eVMCS: 0x%llx\n",\
- (u64)unsupported); \
- vmcs_conf->field &= EVMCS1_SUPPORTED_ ## ctrl; \
- } \
- } \
- while (0)
-
-void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
-{
- evmcs_check_vmcs_conf(cpu_based_exec_ctrl, EXEC_CTRL);
- evmcs_check_vmcs_conf(pin_based_exec_ctrl, PINCTRL);
- evmcs_check_vmcs_conf(cpu_based_2nd_exec_ctrl, 2NDEXEC);
- evmcs_check_vmcs_conf(cpu_based_3rd_exec_ctrl, 3RDEXEC);
- evmcs_check_vmcs_conf(vmentry_ctrl, VMENTRY_CTRL);
- evmcs_check_vmcs_conf(vmexit_ctrl, VMEXIT_CTRL);
-}
-#endif
-
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version)
{
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
index 9623fe1651c4..a87407412615 100644
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -2,199 +2,89 @@
#ifndef __KVM_X86_VMX_HYPERV_H
#define __KVM_X86_VMX_HYPERV_H
-#include <linux/jump_label.h>
-
-#include <asm/hyperv-tlfs.h>
-#include <asm/mshyperv.h>
-#include <asm/vmx.h>
-
-#include "../hyperv.h"
-
-#include "capabilities.h"
-#include "vmcs.h"
+#include <linux/kvm_host.h>
#include "vmcs12.h"
+#include "vmx.h"
-struct vmcs_config;
-
-#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
-
-#define KVM_EVMCS_VERSION 1
+#define EVMPTR_INVALID (-1ULL)
+#define EVMPTR_MAP_PENDING (-2ULL)
-struct evmcs_field {
- u16 offset;
- u16 clean_field;
+enum nested_evmptrld_status {
+ EVMPTRLD_DISABLED,
+ EVMPTRLD_SUCCEEDED,
+ EVMPTRLD_VMFAIL,
+ EVMPTRLD_ERROR,
};
-extern const struct evmcs_field vmcs_field_to_evmcs_1[];
-extern const unsigned int nr_evmcs_1_fields;
-
-static __always_inline int evmcs_field_offset(unsigned long field,
- u16 *clean_field)
-{
- unsigned int index = ROL16(field, 6);
- const struct evmcs_field *evmcs_field;
-
- if (unlikely(index >= nr_evmcs_1_fields))
- return -ENOENT;
-
- evmcs_field = &vmcs_field_to_evmcs_1[index];
-
- /*
- * Use offset=0 to detect holes in eVMCS. This offset belongs to
- * 'revision_id' but this field has no encoding and is supposed to
- * be accessed directly.
- */
- if (unlikely(!evmcs_field->offset))
- return -ENOENT;
-
- if (clean_field)
- *clean_field = evmcs_field->clean_field;
-
- return evmcs_field->offset;
-}
-
-static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
- unsigned long field, u16 offset)
+#ifdef CONFIG_KVM_HYPERV
+static inline bool evmptr_is_valid(u64 evmptr)
{
- /*
- * vmcs12_read_any() doesn't care whether the supplied structure
- * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes
- * the exact offset of the required field, use it for convenience
- * here.
- */
- return vmcs12_read_any((void *)evmcs, field, offset);
+ return evmptr != EVMPTR_INVALID && evmptr != EVMPTR_MAP_PENDING;
}
-#if IS_ENABLED(CONFIG_HYPERV)
-
-DECLARE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
-
-static __always_inline bool kvm_is_using_evmcs(void)
+static inline bool nested_vmx_is_evmptr12_valid(struct vcpu_vmx *vmx)
{
- return static_branch_unlikely(&__kvm_is_using_evmcs);
+ return evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
}
-static __always_inline int get_evmcs_offset(unsigned long field,
- u16 *clean_field)
+static inline bool evmptr_is_set(u64 evmptr)
{
- int offset = evmcs_field_offset(field, clean_field);
-
- WARN_ONCE(offset < 0, "accessing unsupported EVMCS field %lx\n", field);
- return offset;
+ return evmptr != EVMPTR_INVALID;
}
-static __always_inline void evmcs_write64(unsigned long field, u64 value)
+static inline bool nested_vmx_is_evmptr12_set(struct vcpu_vmx *vmx)
{
- u16 clean_field;
- int offset = get_evmcs_offset(field, &clean_field);
-
- if (offset < 0)
- return;
-
- *(u64 *)((char *)current_evmcs + offset) = value;
-
- current_evmcs->hv_clean_fields &= ~clean_field;
+ return evmptr_is_set(vmx->nested.hv_evmcs_vmptr);
}
-static __always_inline void evmcs_write32(unsigned long field, u32 value)
+static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx)
{
- u16 clean_field;
- int offset = get_evmcs_offset(field, &clean_field);
-
- if (offset < 0)
- return;
-
- *(u32 *)((char *)current_evmcs + offset) = value;
- current_evmcs->hv_clean_fields &= ~clean_field;
+ return vmx->nested.hv_evmcs;
}
-static __always_inline void evmcs_write16(unsigned long field, u16 value)
+static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu)
{
- u16 clean_field;
- int offset = get_evmcs_offset(field, &clean_field);
-
- if (offset < 0)
- return;
-
- *(u16 *)((char *)current_evmcs + offset) = value;
- current_evmcs->hv_clean_fields &= ~clean_field;
+ /*
+ * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and
+ * eVMCS has been explicitly enabled by userspace.
+ */
+ return vcpu->arch.hyperv_enabled &&
+ to_vmx(vcpu)->nested.enlightened_vmcs_enabled;
}
-static __always_inline u64 evmcs_read64(unsigned long field)
+u64 nested_get_evmptr(struct kvm_vcpu *vcpu);
+uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
+int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version);
+void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
+bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu);
+void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+#else
+static inline bool evmptr_is_valid(u64 evmptr)
{
- int offset = get_evmcs_offset(field, NULL);
-
- if (offset < 0)
- return 0;
-
- return *(u64 *)((char *)current_evmcs + offset);
+ return false;
}
-static __always_inline u32 evmcs_read32(unsigned long field)
+static inline bool nested_vmx_is_evmptr12_valid(struct vcpu_vmx *vmx)
{
- int offset = get_evmcs_offset(field, NULL);
-
- if (offset < 0)
- return 0;
-
- return *(u32 *)((char *)current_evmcs + offset);
+ return false;
}
-static __always_inline u16 evmcs_read16(unsigned long field)
+static inline bool evmptr_is_set(u64 evmptr)
{
- int offset = get_evmcs_offset(field, NULL);
-
- if (offset < 0)
- return 0;
-
- return *(u16 *)((char *)current_evmcs + offset);
+ return false;
}
-static inline void evmcs_load(u64 phys_addr)
+static inline bool nested_vmx_is_evmptr12_set(struct vcpu_vmx *vmx)
{
- struct hv_vp_assist_page *vp_ap =
- hv_get_vp_assist_page(smp_processor_id());
-
- if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
- vp_ap->nested_control.features.directhypercall = 1;
- vp_ap->current_nested_vmcs = phys_addr;
- vp_ap->enlighten_vmentry = 1;
+ return false;
}
-void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
-#else /* !IS_ENABLED(CONFIG_HYPERV) */
-static __always_inline bool kvm_is_using_evmcs(void) { return false; }
-static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
-static __always_inline void evmcs_write32(unsigned long field, u32 value) {}
-static __always_inline void evmcs_write16(unsigned long field, u16 value) {}
-static __always_inline u64 evmcs_read64(unsigned long field) { return 0; }
-static __always_inline u32 evmcs_read32(unsigned long field) { return 0; }
-static __always_inline u16 evmcs_read16(unsigned long field) { return 0; }
-static inline void evmcs_load(u64 phys_addr) {}
-#endif /* IS_ENABLED(CONFIG_HYPERV) */
-
-#define EVMPTR_INVALID (-1ULL)
-#define EVMPTR_MAP_PENDING (-2ULL)
-
-static inline bool evmptr_is_valid(u64 evmptr)
+static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx)
{
- return evmptr != EVMPTR_INVALID && evmptr != EVMPTR_MAP_PENDING;
+ return NULL;
}
-
-enum nested_evmptrld_status {
- EVMPTRLD_DISABLED,
- EVMPTRLD_SUCCEEDED,
- EVMPTRLD_VMFAIL,
- EVMPTRLD_ERROR,
-};
-
-u64 nested_get_evmptr(struct kvm_vcpu *vcpu);
-uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
-int nested_enable_evmcs(struct kvm_vcpu *vcpu,
- uint16_t *vmcs_version);
-void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
-int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
-bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu);
-void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+#endif
#endif /* __KVM_X86_VMX_HYPERV_H */
diff --git a/arch/x86/kvm/vmx/hyperv_evmcs.c b/arch/x86/kvm/vmx/hyperv_evmcs.c
new file mode 100644
index 000000000000..904bfcd1519b
--- /dev/null
+++ b/arch/x86/kvm/vmx/hyperv_evmcs.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains common code for working with Enlightened VMCS which is
+ * used both by Hyper-V on KVM and KVM on Hyper-V.
+ */
+
+#include "hyperv_evmcs.h"
+
+#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
+#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
+ {EVMCS1_OFFSET(name), clean_field}
+
+const struct evmcs_field vmcs_field_to_evmcs_1[] = {
+ /* 64 bit rw */
+ EVMCS1_FIELD(GUEST_RIP, guest_rip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(GUEST_RSP, guest_rsp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR0, host_cr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR3, host_cr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR4, host_cr4,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_RIP, host_rip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+ EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+ EVMCS1_FIELD(MSR_BITMAP, msr_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP),
+ EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(TSC_OFFSET, tsc_offset,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR0, guest_cr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR3, guest_cr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR4, guest_cr4,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_DR7, guest_dr7,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(HOST_FS_BASE, host_fs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_GS_BASE, host_gs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_TR_BASE, host_tr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_RSP, host_rsp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(EPT_POINTER, ept_pointer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+ EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(ENCLS_EXITING_BITMAP, encls_exiting_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(TSC_MULTIPLIER, tsc_multiplier,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ /*
+ * Not used by KVM:
+ *
+ * EVMCS1_FIELD(0x00006828, guest_ia32_s_cet,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ * EVMCS1_FIELD(0x0000682A, guest_ssp,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ * EVMCS1_FIELD(0x0000682C, guest_ia32_int_ssp_table_addr,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ * EVMCS1_FIELD(0x00002816, guest_ia32_lbr_ctl,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ * EVMCS1_FIELD(0x00006C18, host_ia32_s_cet,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ * EVMCS1_FIELD(0x00006C1A, host_ssp,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ * EVMCS1_FIELD(0x00006C1C, host_ia32_int_ssp_table_addr,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ */
+
+ /* 64 bit read only */
+ EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ /*
+ * Not defined in KVM:
+ *
+ * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ */
+ EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+ /*
+ * No mask defined in the spec as Hyper-V doesn't currently support
+ * these. Future proof by resetting the whole clean field mask on
+ * access.
+ */
+ EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+ /* 32 bit rw */
+ EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC),
+ EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN),
+ EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY),
+ EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ vm_entry_exception_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+
+ /* 32 bit read only */
+ EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+ /* No mask defined in the spec (not used) */
+ EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+ /* 16 bit rw */
+ EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+};
+const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
diff --git a/arch/x86/kvm/vmx/hyperv_evmcs.h b/arch/x86/kvm/vmx/hyperv_evmcs.h
new file mode 100644
index 000000000000..a543fccfc574
--- /dev/null
+++ b/arch/x86/kvm/vmx/hyperv_evmcs.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file contains common definitions for working with Enlightened VMCS which
+ * are used both by Hyper-V on KVM and KVM on Hyper-V.
+ */
+#ifndef __KVM_X86_VMX_HYPERV_EVMCS_H
+#define __KVM_X86_VMX_HYPERV_EVMCS_H
+
+#include <asm/hyperv-tlfs.h>
+
+#include "capabilities.h"
+#include "vmcs12.h"
+
+#define KVM_EVMCS_VERSION 1
+
+/*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ * POSTED_INTR_NV = 0x00000002,
+ * GUEST_INTR_STATUS = 0x00000810,
+ * APIC_ACCESS_ADDR = 0x00002014,
+ * POSTED_INTR_DESC_ADDR = 0x00002016,
+ * EOI_EXIT_BITMAP0 = 0x0000201c,
+ * EOI_EXIT_BITMAP1 = 0x0000201e,
+ * EOI_EXIT_BITMAP2 = 0x00002020,
+ * EOI_EXIT_BITMAP3 = 0x00002022,
+ * GUEST_PML_INDEX = 0x00000812,
+ * PML_ADDRESS = 0x0000200e,
+ * VM_FUNCTION_CONTROL = 0x00002018,
+ * EPTP_LIST_ADDRESS = 0x00002024,
+ * VMREAD_BITMAP = 0x00002026,
+ * VMWRITE_BITMAP = 0x00002028,
+ *
+ * TSC_MULTIPLIER = 0x00002032,
+ * PLE_GAP = 0x00004020,
+ * PLE_WINDOW = 0x00004022,
+ * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ *
+ * Currently unsupported in KVM:
+ * GUEST_IA32_RTIT_CTL = 0x00002814,
+ */
+#define EVMCS1_SUPPORTED_PINCTRL \
+ (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
+ PIN_BASED_EXT_INTR_MASK | \
+ PIN_BASED_NMI_EXITING | \
+ PIN_BASED_VIRTUAL_NMIS)
+
+#define EVMCS1_SUPPORTED_EXEC_CTRL \
+ (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \
+ CPU_BASED_HLT_EXITING | \
+ CPU_BASED_CR3_LOAD_EXITING | \
+ CPU_BASED_CR3_STORE_EXITING | \
+ CPU_BASED_UNCOND_IO_EXITING | \
+ CPU_BASED_MOV_DR_EXITING | \
+ CPU_BASED_USE_TSC_OFFSETTING | \
+ CPU_BASED_MWAIT_EXITING | \
+ CPU_BASED_MONITOR_EXITING | \
+ CPU_BASED_INVLPG_EXITING | \
+ CPU_BASED_RDPMC_EXITING | \
+ CPU_BASED_INTR_WINDOW_EXITING | \
+ CPU_BASED_CR8_LOAD_EXITING | \
+ CPU_BASED_CR8_STORE_EXITING | \
+ CPU_BASED_RDTSC_EXITING | \
+ CPU_BASED_TPR_SHADOW | \
+ CPU_BASED_USE_IO_BITMAPS | \
+ CPU_BASED_MONITOR_TRAP_FLAG | \
+ CPU_BASED_USE_MSR_BITMAPS | \
+ CPU_BASED_NMI_WINDOW_EXITING | \
+ CPU_BASED_PAUSE_EXITING | \
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+
+#define EVMCS1_SUPPORTED_2NDEXEC \
+ (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \
+ SECONDARY_EXEC_WBINVD_EXITING | \
+ SECONDARY_EXEC_ENABLE_VPID | \
+ SECONDARY_EXEC_ENABLE_EPT | \
+ SECONDARY_EXEC_UNRESTRICTED_GUEST | \
+ SECONDARY_EXEC_DESC | \
+ SECONDARY_EXEC_ENABLE_RDTSCP | \
+ SECONDARY_EXEC_ENABLE_INVPCID | \
+ SECONDARY_EXEC_ENABLE_XSAVES | \
+ SECONDARY_EXEC_RDSEED_EXITING | \
+ SECONDARY_EXEC_RDRAND_EXITING | \
+ SECONDARY_EXEC_TSC_SCALING | \
+ SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \
+ SECONDARY_EXEC_PT_USE_GPA | \
+ SECONDARY_EXEC_PT_CONCEAL_VMX | \
+ SECONDARY_EXEC_BUS_LOCK_DETECTION | \
+ SECONDARY_EXEC_NOTIFY_VM_EXITING | \
+ SECONDARY_EXEC_ENCLS_EXITING)
+
+#define EVMCS1_SUPPORTED_3RDEXEC (0ULL)
+
+#define EVMCS1_SUPPORTED_VMEXIT_CTRL \
+ (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | \
+ VM_EXIT_SAVE_DEBUG_CONTROLS | \
+ VM_EXIT_ACK_INTR_ON_EXIT | \
+ VM_EXIT_HOST_ADDR_SPACE_SIZE | \
+ VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_EXIT_SAVE_IA32_PAT | \
+ VM_EXIT_LOAD_IA32_PAT | \
+ VM_EXIT_SAVE_IA32_EFER | \
+ VM_EXIT_LOAD_IA32_EFER | \
+ VM_EXIT_CLEAR_BNDCFGS | \
+ VM_EXIT_PT_CONCEAL_PIP | \
+ VM_EXIT_CLEAR_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMENTRY_CTRL \
+ (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | \
+ VM_ENTRY_LOAD_DEBUG_CONTROLS | \
+ VM_ENTRY_IA32E_MODE | \
+ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_ENTRY_LOAD_IA32_PAT | \
+ VM_ENTRY_LOAD_IA32_EFER | \
+ VM_ENTRY_LOAD_BNDCFGS | \
+ VM_ENTRY_PT_CONCEAL_PIP | \
+ VM_ENTRY_LOAD_IA32_RTIT_CTL)
+
+#define EVMCS1_SUPPORTED_VMFUNC (0)
+
+struct evmcs_field {
+ u16 offset;
+ u16 clean_field;
+};
+
+extern const struct evmcs_field vmcs_field_to_evmcs_1[];
+extern const unsigned int nr_evmcs_1_fields;
+
+static __always_inline int evmcs_field_offset(unsigned long field,
+ u16 *clean_field)
+{
+ const struct evmcs_field *evmcs_field;
+ unsigned int index = ROL16(field, 6);
+
+ if (unlikely(index >= nr_evmcs_1_fields))
+ return -ENOENT;
+
+ evmcs_field = &vmcs_field_to_evmcs_1[index];
+
+ /*
+ * Use offset=0 to detect holes in eVMCS. This offset belongs to
+ * 'revision_id' but this field has no encoding and is supposed to
+ * be accessed directly.
+ */
+ if (unlikely(!evmcs_field->offset))
+ return -ENOENT;
+
+ if (clean_field)
+ *clean_field = evmcs_field->clean_field;
+
+ return evmcs_field->offset;
+}
+
+static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
+ unsigned long field, u16 offset)
+{
+ /*
+ * vmcs12_read_any() doesn't care whether the supplied structure
+ * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes
+ * the exact offset of the required field, use it for convenience
+ * here.
+ */
+ return vmcs12_read_any((void *)evmcs, field, offset);
+}
+
+#endif /* __KVM_X86_VMX_HYPERV_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 65826fe23f33..6329a306856b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -179,7 +179,7 @@ static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
* VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
* fields and thus must be synced.
*/
- if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
+ if (nested_vmx_is_evmptr12_set(to_vmx(vcpu)))
to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
return kvm_skip_emulated_instruction(vcpu);
@@ -194,7 +194,7 @@ static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
* can't be done if there isn't a current VMCS.
*/
if (vmx->nested.current_vmptr == INVALID_GPA &&
- !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ !nested_vmx_is_evmptr12_valid(vmx))
return nested_vmx_failInvalid(vcpu);
return nested_vmx_failValid(vcpu, vm_instruction_error);
@@ -226,10 +226,11 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
{
+#ifdef CONFIG_KVM_HYPERV
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ if (nested_vmx_is_evmptr12_valid(vmx)) {
kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
vmx->nested.hv_evmcs = NULL;
}
@@ -241,6 +242,34 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
hv_vcpu->nested.vm_id = 0;
hv_vcpu->nested.vp_id = 0;
}
+#endif
+}
+
+static bool nested_evmcs_handle_vmclear(struct kvm_vcpu *vcpu, gpa_t vmptr)
+{
+#ifdef CONFIG_KVM_HYPERV
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /*
+ * When Enlightened VMEntry is enabled on the calling CPU we treat
+ * memory area pointer by vmptr as Enlightened VMCS (as there's no good
+ * way to distinguish it from VMCS12) and we must not corrupt it by
+ * writing to the non-existent 'launch_state' field. The area doesn't
+ * have to be the currently active EVMCS on the calling CPU and there's
+ * nothing KVM has to do to transition it from 'active' to 'non-active'
+ * state. It is possible that the area will stay mapped as
+ * vmx->nested.hv_evmcs but this shouldn't be a problem.
+ */
+ if (!guest_cpuid_has_evmcs(vcpu) ||
+ !evmptr_is_valid(nested_get_evmptr(vcpu)))
+ return false;
+
+ if (nested_vmx_evmcs(vmx) && vmptr == vmx->nested.hv_evmcs_vmptr)
+ nested_release_evmcs(vcpu);
+
+ return true;
+#else
+ return false;
+#endif
}
static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
@@ -572,7 +601,6 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
int msr;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
- struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
/* Nothing to do if the MSR bitmap is not in use. */
@@ -588,10 +616,13 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
* - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature
* and tells KVM (L0) there were no changes in MSR bitmap for L2.
*/
- if (!vmx->nested.force_msr_bitmap_recalc && evmcs &&
- evmcs->hv_enlightenments_control.msr_bitmap &&
- evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP)
- return true;
+ if (!vmx->nested.force_msr_bitmap_recalc) {
+ struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
+
+ if (evmcs && evmcs->hv_enlightenments_control.msr_bitmap &&
+ evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP)
+ return true;
+ }
if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
return false;
@@ -1085,7 +1116,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
bool nested_ept, bool reload_pdptrs,
enum vm_entry_failure_code *entry_failure_code)
{
- if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
+ if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
@@ -1139,14 +1170,8 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- /*
- * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
- * L2's VP_ID upon request from the guest. Make sure we check for
- * pending entries in the right FIFO upon L1/L2 transition as these
- * requests are put by other vCPUs asynchronously.
- */
- if (to_hv_vcpu(vcpu) && enable_ept)
- kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+ /* Handle pending Hyper-V TLB flush requests */
+ kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept);
/*
* If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
@@ -1578,8 +1603,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
{
+#ifdef CONFIG_KVM_HYPERV
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
- struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+ struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu);
/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
@@ -1818,12 +1844,16 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields
*/
return;
+#else /* CONFIG_KVM_HYPERV */
+ KVM_BUG_ON(1, vmx->vcpu.kvm);
+#endif /* CONFIG_KVM_HYPERV */
}
static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
{
+#ifdef CONFIG_KVM_HYPERV
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
- struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+ struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
/*
* Should not be changed by KVM:
@@ -1992,6 +2022,9 @@ static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
return;
+#else /* CONFIG_KVM_HYPERV */
+ KVM_BUG_ON(1, vmx->vcpu.kvm);
+#endif /* CONFIG_KVM_HYPERV */
}
/*
@@ -2001,6 +2034,7 @@ static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
struct kvm_vcpu *vcpu, bool from_launch)
{
+#ifdef CONFIG_KVM_HYPERV
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool evmcs_gpa_changed = false;
u64 evmcs_gpa;
@@ -2082,13 +2116,16 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
}
return EVMPTRLD_SUCCEEDED;
+#else
+ return EVMPTRLD_DISABLED;
+#endif
}
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ if (nested_vmx_is_evmptr12_valid(vmx))
copy_vmcs12_to_enlightened(vmx);
else
copy_vmcs12_to_shadow(vmx);
@@ -2242,7 +2279,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
u32 exec_control;
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
- if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ if (vmx->nested.dirty_vmcs12 || nested_vmx_is_evmptr12_valid(vmx))
prepare_vmcs02_early_rare(vmx, vmcs12);
/*
@@ -2403,7 +2440,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
- struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+ struct hv_enlightened_vmcs *hv_evmcs = nested_vmx_evmcs(vmx);
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
@@ -2535,15 +2572,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
enum vm_entry_failure_code *entry_failure_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
bool load_guest_pdptrs_vmcs12 = false;
- if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ if (vmx->nested.dirty_vmcs12 || nested_vmx_is_evmptr12_valid(vmx)) {
prepare_vmcs02_rare(vmx, vmcs12);
vmx->nested.dirty_vmcs12 = false;
- load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
- !(vmx->nested.hv_evmcs->hv_clean_fields &
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
+ load_guest_pdptrs_vmcs12 = !nested_vmx_is_evmptr12_valid(vmx) ||
+ !(evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
}
if (vmx->nested.nested_run_pending &&
@@ -2664,9 +2701,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* bits when it changes a field in eVMCS. Mark all fields as clean
* here.
*/
- if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
- vmx->nested.hv_evmcs->hv_clean_fields |=
- HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ if (nested_vmx_is_evmptr12_valid(vmx))
+ evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
return 0;
}
@@ -2717,7 +2753,7 @@ static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
}
/* Reserved bits should not be set */
- if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
+ if (CC(!kvm_vcpu_is_legal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
return false;
/* AD, if set, should be supported */
@@ -2888,8 +2924,10 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
nested_check_vm_entry_controls(vcpu, vmcs12))
return -EINVAL;
+#ifdef CONFIG_KVM_HYPERV
if (guest_cpuid_has_evmcs(vcpu))
return nested_evmcs_check_controls(vmcs12);
+#endif
return 0;
}
@@ -2912,7 +2950,7 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
- CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
+ CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3)))
return -EINVAL;
if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
@@ -3161,6 +3199,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
return 0;
}
+#ifdef CONFIG_KVM_HYPERV
static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3188,6 +3227,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
return true;
}
+#endif
static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
@@ -3279,6 +3319,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
{
+#ifdef CONFIG_KVM_HYPERV
/*
* Note: nested_get_evmcs_page() also updates 'vp_assist_page' copy
* in 'struct kvm_vcpu_hv' in case eVMCS is in use, this is mandatory
@@ -3295,6 +3336,7 @@ static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
return false;
}
+#endif
if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
return false;
@@ -3538,7 +3580,7 @@ vmentry_fail_vmexit:
load_vmcs12_host_state(vcpu, vmcs12);
vmcs12->vm_exit_reason = exit_reason.full;
- if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ if (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx))
vmx->nested.need_vmcs12_to_shadow_sync = true;
return NVMX_VMENTRY_VMEXIT;
}
@@ -3569,7 +3611,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
return nested_vmx_failInvalid(vcpu);
- if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
+ if (CC(!nested_vmx_is_evmptr12_valid(vmx) &&
vmx->nested.current_vmptr == INVALID_GPA))
return nested_vmx_failInvalid(vcpu);
@@ -3584,8 +3626,10 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (CC(vmcs12->hdr.shadow_vmcs))
return nested_vmx_failInvalid(vcpu);
- if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
- copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
+ if (nested_vmx_is_evmptr12_valid(vmx)) {
+ struct hv_enlightened_vmcs *evmcs = nested_vmx_evmcs(vmx);
+
+ copy_enlightened_to_vmcs12(vmx, evmcs->hv_clean_fields);
/* Enlightened VMCS doesn't have launch state */
vmcs12->launch_state = !launch;
} else if (enable_shadow_vmcs) {
@@ -4329,11 +4373,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ if (nested_vmx_is_evmptr12_valid(vmx))
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
- !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
+ !nested_vmx_is_evmptr12_valid(vmx);
vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
@@ -4732,6 +4776,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
+#ifdef CONFIG_KVM_HYPERV
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
/*
* KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
@@ -4741,6 +4786,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
*/
(void)nested_get_evmcs_page(vcpu);
}
+#endif
/* Service pending TLB flush requests for L2 before switching to L1. */
kvm_service_local_tlb_flush_requests(vcpu);
@@ -4854,7 +4900,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
}
if ((vm_exit_reason != -1) &&
- (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
+ (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
/* in case we halted in L2 */
@@ -4980,6 +5026,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
else
*ret = off;
+ *ret = vmx_get_untagged_addr(vcpu, *ret, 0);
/* Long mode: #GP(0)/#SS(0) if the memory address is in a
* non-canonical form. This is the only check on the memory
* destination for long mode!
@@ -5292,18 +5339,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (vmptr == vmx->nested.vmxon_ptr)
return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
- /*
- * When Enlightened VMEntry is enabled on the calling CPU we treat
- * memory area pointer by vmptr as Enlightened VMCS (as there's no good
- * way to distinguish it from VMCS12) and we must not corrupt it by
- * writing to the non-existent 'launch_state' field. The area doesn't
- * have to be the currently active EVMCS on the calling CPU and there's
- * nothing KVM has to do to transition it from 'active' to 'non-active'
- * state. It is possible that the area will stay mapped as
- * vmx->nested.hv_evmcs but this shouldn't be a problem.
- */
- if (likely(!guest_cpuid_has_evmcs(vcpu) ||
- !evmptr_is_valid(nested_get_evmptr(vcpu)))) {
+ if (likely(!nested_evmcs_handle_vmclear(vcpu, vmptr))) {
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vcpu);
@@ -5320,8 +5356,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
vmptr + offsetof(struct vmcs12,
launch_state),
&zero, sizeof(zero));
- } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
- nested_release_evmcs(vcpu);
}
return nested_vmx_succeed(vcpu);
@@ -5360,7 +5394,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
/* Decode instruction info and find the field to read */
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
- if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ if (!nested_vmx_is_evmptr12_valid(vmx)) {
/*
* In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
* any VMREAD sets the ALU flags for VMfailInvalid.
@@ -5398,7 +5432,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/* Read the field, zero-extended to a u64 value */
- value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset);
+ value = evmcs_read_any(nested_vmx_evmcs(vmx), field, offset);
}
/*
@@ -5586,7 +5620,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
/* Forbid normal VMPTRLD if Enlightened version was used */
- if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ if (nested_vmx_is_evmptr12_valid(vmx))
return 1;
if (vmx->nested.current_vmptr != vmptr) {
@@ -5649,7 +5683,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
+ if (unlikely(nested_vmx_is_evmptr12_valid(to_vmx(vcpu))))
return 1;
if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
@@ -5797,6 +5831,10 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
vpid02 = nested_get_vpid02(vcpu);
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+ /*
+ * LAM doesn't apply to addresses that are inputs to TLB
+ * invalidation.
+ */
if (!operand.vpid ||
is_noncanonical_address(operand.gla, vcpu))
return nested_vmx_fail(vcpu,
@@ -6208,11 +6246,13 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
* Handle L2's bus locks in L0 directly.
*/
return true;
+#ifdef CONFIG_KVM_HYPERV
case EXIT_REASON_VMCALL:
/* Hyper-V L2 TLB flush hypercall is handled by L0 */
return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
nested_evmcs_l2_tlb_flush_enabled(vcpu) &&
kvm_hv_is_tlb_flush_hcall(vcpu);
+#endif
default:
break;
}
@@ -6435,7 +6475,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
/* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
- if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
+ if (nested_vmx_is_evmptr12_set(vmx))
kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
if (is_guest_mode(vcpu) &&
@@ -6491,7 +6531,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
} else {
copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
if (!vmx->nested.need_vmcs12_to_shadow_sync) {
- if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+ if (nested_vmx_is_evmptr12_valid(vmx))
/*
* L1 hypervisor is not obliged to keep eVMCS
* clean fields data always up-to-date while
@@ -6632,6 +6672,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
+#ifdef CONFIG_KVM_HYPERV
} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
/*
* nested_vmx_handle_enlightened_vmptrld() cannot be called
@@ -6641,6 +6682,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
*/
vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+#endif
} else {
return -EINVAL;
}
@@ -7096,7 +7138,9 @@ struct kvm_x86_nested_ops vmx_nested_ops = {
.set_state = vmx_set_nested_state,
.get_nested_state_pages = vmx_get_nested_state_pages,
.write_log_dirty = nested_vmx_write_pml_buffer,
+#ifdef CONFIG_KVM_HYPERV
.enable_evmcs = nested_enable_evmcs,
.get_evmcs_version = nested_get_evmcs_version,
.hv_inject_synthetic_vmexit_post_tlb_flush = vmx_hv_inject_synthetic_vmexit_post_tlb_flush,
+#endif
};
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index b4b9d51438c6..cce4e2aa30fb 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -3,6 +3,7 @@
#define __KVM_X86_VMX_NESTED_H
#include "kvm_cache_regs.h"
+#include "hyperv.h"
#include "vmcs12.h"
#include "vmx.h"
@@ -57,7 +58,7 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
/* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
return vmx->nested.current_vmptr != -1ull ||
- vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID;
+ nested_vmx_is_evmptr12_set(vmx);
}
static inline u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 820d3e1f6b4f..a6216c874729 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -437,11 +437,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
pmc_write_counter(pmc, data);
- pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc_write_counter(pmc, data);
- pmc_update_sample_period(pmc);
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
reserved_bits = pmu->reserved_bits;
@@ -632,26 +630,6 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- struct kvm_pmc *pmc = NULL;
- int i;
-
- for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
- pmc = &pmu->gp_counters[i];
-
- pmc_stop_counter(pmc);
- pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
- }
-
- for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
- pmc = &pmu->fixed_counters[i];
-
- pmc_stop_counter(pmc);
- pmc->counter = pmc->prev_counter = 0;
- }
-
- pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
-
intel_pmu_release_guest_lbr_event(vcpu);
}
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 3e822e582497..6fef01e0536e 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -37,6 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
if (!IS_ALIGNED(*gva, alignment)) {
fault = true;
} else if (likely(is_64_bit_mode(vcpu))) {
+ *gva = vmx_get_untagged_addr(vcpu, *gva, 0);
fault = is_noncanonical_address(*gva, vcpu);
} else {
*gva &= 0xffffffff;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index be275a0410a8..906ecd001511 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -289,7 +289,7 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
RET
.Lfixup:
- cmpb $0, kvm_rebooting
+ cmpb $0, _ASM_RIP(kvm_rebooting)
jne .Lvmfail
ud2
.Lvmfail:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e0f86f11c345..e262bc2ba4e5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -66,6 +66,7 @@
#include "vmx.h"
#include "x86.h"
#include "smm.h"
+#include "vmx_onhyperv.h"
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -523,22 +524,14 @@ module_param(enlightened_vmcs, bool, 0444);
static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
{
struct hv_enlightened_vmcs *evmcs;
- struct hv_partition_assist_pg **p_hv_pa_pg =
- &to_kvm_hv(vcpu->kvm)->hv_pa_pg;
- /*
- * Synthetic VM-Exit is not enabled in current code and so All
- * evmcs in singe VM shares same assist page.
- */
- if (!*p_hv_pa_pg)
- *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
+ hpa_t partition_assist_page = hv_get_partition_assist_page(vcpu);
- if (!*p_hv_pa_pg)
+ if (partition_assist_page == INVALID_PAGE)
return -ENOMEM;
evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
- evmcs->partition_assist_page =
- __pa(*p_hv_pa_pg);
+ evmcs->partition_assist_page = partition_assist_page;
evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
@@ -2055,6 +2048,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data))
return 1;
+#ifdef CONFIG_KVM_HYPERV
/*
* Enlightened VMCS v1 doesn't have certain VMCS fields but
* instead of just ignoring the features, different Hyper-V
@@ -2065,6 +2059,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu))
nested_evmcs_filter_control_msr(vcpu, msr_info->index,
&msr_info->data);
+#endif
break;
case MSR_IA32_RTIT_CTL:
if (!vmx_pt_mode_is_host_guest())
@@ -3400,7 +3395,8 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
update_guest_cr3 = false;
vmx_ept_load_pdptrs(vcpu);
} else {
- guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu);
+ guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu) |
+ kvm_get_active_cr3_lam_bits(vcpu);
}
if (update_guest_cr3)
@@ -4833,7 +4829,10 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->nested.posted_intr_nv = -1;
vmx->nested.vmxon_ptr = INVALID_GPA;
vmx->nested.current_vmptr = INVALID_GPA;
+
+#ifdef CONFIG_KVM_HYPERV
vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
+#endif
vcpu->arch.microcode_version = 0x100000000ULL;
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
@@ -5782,7 +5781,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* would also use advanced VM-exit information for EPT violations to
* reconstruct the page fault error code.
*/
- if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
+ if (unlikely(allow_smaller_maxphyaddr && !kvm_vcpu_is_legal_gpa(vcpu, gpa)))
return kvm_emulate_instruction(vcpu, 0);
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
@@ -6757,10 +6756,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
return;
/*
- * Grab the memslot so that the hva lookup for the mmu_notifier retry
- * is guaranteed to use the same memslot as the pfn lookup, i.e. rely
- * on the pfn lookup's validation of the memslot to ensure a valid hva
- * is used for the retry check.
+ * Explicitly grab the memslot using KVM's internal slot ID to ensure
+ * KVM doesn't unintentionally grab a userspace memslot. It _should_
+ * be impossible for userspace to create a memslot for the APIC when
+ * APICv is enabled, but paranoia won't hurt in this case.
*/
slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
@@ -6785,8 +6784,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
return;
read_lock(&vcpu->kvm->mmu_lock);
- if (mmu_invalidate_retry_hva(kvm, mmu_seq,
- gfn_to_hva_memslot(slot, gfn))) {
+ if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
read_unlock(&vcpu->kvm->mmu_lock);
goto out;
@@ -7674,6 +7672,9 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP));
cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57));
+ entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1);
+ cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM));
+
#undef cr4_fixed1_update
}
@@ -7760,6 +7761,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LAM);
vmx_setup_uret_msrs(vmx);
@@ -8206,6 +8208,50 @@ static void vmx_vm_destroy(struct kvm *kvm)
free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm));
}
+/*
+ * Note, the SDM states that the linear address is masked *after* the modified
+ * canonicality check, whereas KVM masks (untags) the address and then performs
+ * a "normal" canonicality check. Functionally, the two methods are identical,
+ * and when the masking occurs relative to the canonicality check isn't visible
+ * to software, i.e. KVM's behavior doesn't violate the SDM.
+ */
+gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags)
+{
+ int lam_bit;
+ unsigned long cr3_bits;
+
+ if (flags & (X86EMUL_F_FETCH | X86EMUL_F_IMPLICIT | X86EMUL_F_INVLPG))
+ return gva;
+
+ if (!is_64_bit_mode(vcpu))
+ return gva;
+
+ /*
+ * Bit 63 determines if the address should be treated as user address
+ * or a supervisor address.
+ */
+ if (!(gva & BIT_ULL(63))) {
+ cr3_bits = kvm_get_active_cr3_lam_bits(vcpu);
+ if (!(cr3_bits & (X86_CR3_LAM_U57 | X86_CR3_LAM_U48)))
+ return gva;
+
+ /* LAM_U48 is ignored if LAM_U57 is set. */
+ lam_bit = cr3_bits & X86_CR3_LAM_U57 ? 56 : 47;
+ } else {
+ if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_LAM_SUP))
+ return gva;
+
+ lam_bit = kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 56 : 47;
+ }
+
+ /*
+ * Untag the address by sign-extending the lam_bit, but NOT to bit 63.
+ * Bit 63 is retained from the raw virtual address so that untagging
+ * doesn't change a user access to a supervisor access, and vice versa.
+ */
+ return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63));
+}
+
static struct kvm_x86_ops vmx_x86_ops __initdata = {
.name = KBUILD_MODNAME,
@@ -8346,6 +8392,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.complete_emulated_msr = kvm_complete_insn_gp,
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
+
+ .get_untagged_addr = vmx_get_untagged_addr,
};
static unsigned int vmx_handle_intel_pt_intr(void)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index c2130d2c8e24..e3b0985bb74a 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -241,9 +241,11 @@ struct nested_vmx {
bool guest_mode;
} smm;
+#ifdef CONFIG_KVM_HYPERV
gpa_t hv_evmcs_vmptr;
struct kvm_host_map hv_evmcs_map;
struct hv_enlightened_vmcs *hv_evmcs;
+#endif
};
struct vcpu_vmx {
@@ -420,6 +422,8 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
+gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
+
static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
int type, bool value)
{
@@ -745,14 +749,4 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && enable_ipiv;
}
-static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu)
-{
- /*
- * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and
- * eVMCS has been explicitly enabled by userspace.
- */
- return vcpu->arch.hyperv_enabled &&
- to_vmx(vcpu)->nested.enlightened_vmcs_enabled;
-}
-
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.c b/arch/x86/kvm/vmx/vmx_onhyperv.c
new file mode 100644
index 000000000000..b9a8b91166d0
--- /dev/null
+++ b/arch/x86/kvm/vmx/vmx_onhyperv.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "capabilities.h"
+#include "vmx_onhyperv.h"
+
+DEFINE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
+/*
+ * KVM on Hyper-V always uses the latest known eVMCSv1 revision, the assumption
+ * is: in case a feature has corresponding fields in eVMCS described and it was
+ * exposed in VMX feature MSRs, KVM is free to use it. Warn if KVM meets a
+ * feature which has no corresponding eVMCS field, this likely means that KVM
+ * needs to be updated.
+ */
+#define evmcs_check_vmcs_conf(field, ctrl) \
+ do { \
+ typeof(vmcs_conf->field) unsupported; \
+ \
+ unsupported = vmcs_conf->field & ~EVMCS1_SUPPORTED_ ## ctrl; \
+ if (unsupported) { \
+ pr_warn_once(#field " unsupported with eVMCS: 0x%llx\n",\
+ (u64)unsupported); \
+ vmcs_conf->field &= EVMCS1_SUPPORTED_ ## ctrl; \
+ } \
+ } \
+ while (0)
+
+void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
+{
+ evmcs_check_vmcs_conf(cpu_based_exec_ctrl, EXEC_CTRL);
+ evmcs_check_vmcs_conf(pin_based_exec_ctrl, PINCTRL);
+ evmcs_check_vmcs_conf(cpu_based_2nd_exec_ctrl, 2NDEXEC);
+ evmcs_check_vmcs_conf(cpu_based_3rd_exec_ctrl, 3RDEXEC);
+ evmcs_check_vmcs_conf(vmentry_ctrl, VMENTRY_CTRL);
+ evmcs_check_vmcs_conf(vmexit_ctrl, VMEXIT_CTRL);
+}
diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.h b/arch/x86/kvm/vmx/vmx_onhyperv.h
new file mode 100644
index 000000000000..eb48153bfd73
--- /dev/null
+++ b/arch/x86/kvm/vmx/vmx_onhyperv.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ARCH_X86_KVM_VMX_ONHYPERV_H__
+#define __ARCH_X86_KVM_VMX_ONHYPERV_H__
+
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+
+#include <linux/jump_label.h>
+
+#include "capabilities.h"
+#include "hyperv_evmcs.h"
+#include "vmcs12.h"
+
+#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+DECLARE_STATIC_KEY_FALSE(__kvm_is_using_evmcs);
+
+static __always_inline bool kvm_is_using_evmcs(void)
+{
+ return static_branch_unlikely(&__kvm_is_using_evmcs);
+}
+
+static __always_inline int get_evmcs_offset(unsigned long field,
+ u16 *clean_field)
+{
+ int offset = evmcs_field_offset(field, clean_field);
+
+ WARN_ONCE(offset < 0, "accessing unsupported EVMCS field %lx\n", field);
+ return offset;
+}
+
+static __always_inline void evmcs_write64(unsigned long field, u64 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u64 *)((char *)current_evmcs + offset) = value;
+
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static __always_inline void evmcs_write32(unsigned long field, u32 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u32 *)((char *)current_evmcs + offset) = value;
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static __always_inline void evmcs_write16(unsigned long field, u16 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u16 *)((char *)current_evmcs + offset) = value;
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static __always_inline u64 evmcs_read64(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u64 *)((char *)current_evmcs + offset);
+}
+
+static __always_inline u32 evmcs_read32(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u32 *)((char *)current_evmcs + offset);
+}
+
+static __always_inline u16 evmcs_read16(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u16 *)((char *)current_evmcs + offset);
+}
+
+static inline void evmcs_load(u64 phys_addr)
+{
+ struct hv_vp_assist_page *vp_ap =
+ hv_get_vp_assist_page(smp_processor_id());
+
+ if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
+ vp_ap->nested_control.features.directhypercall = 1;
+ vp_ap->current_nested_vmcs = phys_addr;
+ vp_ap->enlighten_vmentry = 1;
+}
+
+void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
+#else /* !IS_ENABLED(CONFIG_HYPERV) */
+static __always_inline bool kvm_is_using_evmcs(void) { return false; }
+static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
+static __always_inline void evmcs_write32(unsigned long field, u32 value) {}
+static __always_inline void evmcs_write16(unsigned long field, u16 value) {}
+static __always_inline u64 evmcs_read64(unsigned long field) { return 0; }
+static __always_inline u32 evmcs_read32(unsigned long field) { return 0; }
+static __always_inline u16 evmcs_read16(unsigned long field) { return 0; }
+static inline void evmcs_load(u64 phys_addr) {}
+#endif /* IS_ENABLED(CONFIG_HYPERV) */
+
+#endif /* __ARCH_X86_KVM_VMX_ONHYPERV_H__ */
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 33af7b4c6eb4..f41ce3c24123 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -6,7 +6,7 @@
#include <asm/vmx.h>
-#include "hyperv.h"
+#include "vmx_onhyperv.h"
#include "vmcs.h"
#include "../x86.h"
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cec0fc2a4b1c..363b1c080205 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1284,7 +1284,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
* stuff CR3, e.g. for RSM emulation, and there is no guarantee that
* the current vCPU mode is accurate.
*/
- if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
+ if (!kvm_vcpu_is_legal_cr3(vcpu, cr3))
return 1;
if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, cr3))
@@ -1504,6 +1504,8 @@ static unsigned num_msrs_to_save;
static const u32 emulated_msrs_all[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
+
+#ifdef CONFIG_KVM_HYPERV
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
@@ -1521,6 +1523,7 @@ static const u32 emulated_msrs_all[] = {
HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
HV_X64_MSR_SYNDBG_PENDING_BUFFER,
+#endif
MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
@@ -2510,26 +2513,29 @@ static inline int gtod_is_based_on_tsc(int mode)
}
#endif
-static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
+static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu, bool new_generation)
{
#ifdef CONFIG_X86_64
- bool vcpus_matched;
struct kvm_arch *ka = &vcpu->kvm->arch;
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
- vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
- atomic_read(&vcpu->kvm->online_vcpus));
+ /*
+ * To use the masterclock, the host clocksource must be based on TSC
+ * and all vCPUs must have matching TSCs. Note, the count for matching
+ * vCPUs doesn't include the reference vCPU, hence "+1".
+ */
+ bool use_master_clock = (ka->nr_vcpus_matched_tsc + 1 ==
+ atomic_read(&vcpu->kvm->online_vcpus)) &&
+ gtod_is_based_on_tsc(gtod->clock.vclock_mode);
/*
- * Once the masterclock is enabled, always perform request in
- * order to update it.
- *
- * In order to enable masterclock, the host clocksource must be TSC
- * and the vcpus need to have matched TSCs. When that happens,
- * perform request to enable masterclock.
+ * Request a masterclock update if the masterclock needs to be toggled
+ * on/off, or when starting a new generation and the masterclock is
+ * enabled (compute_guest_tsc() requires the masterclock snapshot to be
+ * taken _after_ the new generation is created).
*/
- if (ka->use_master_clock ||
- (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
+ if ((ka->use_master_clock && new_generation) ||
+ (ka->use_master_clock != use_master_clock))
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -2706,7 +2712,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
- kvm_track_tsc_matching(vcpu);
+ kvm_track_tsc_matching(vcpu, !matched);
}
static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value)
@@ -3104,7 +3110,8 @@ u64 get_kvmclock_ns(struct kvm *kvm)
static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
struct gfn_to_pfn_cache *gpc,
- unsigned int offset)
+ unsigned int offset,
+ bool force_tsc_unstable)
{
struct kvm_vcpu_arch *vcpu = &v->arch;
struct pvclock_vcpu_time_info *guest_hv_clock;
@@ -3141,6 +3148,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
}
memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
+
+ if (force_tsc_unstable)
+ guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT;
+
smp_wmb();
guest_hv_clock->version = ++vcpu->hv_clock.version;
@@ -3161,6 +3172,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
u64 tsc_timestamp, host_tsc;
u8 pvclock_flags;
bool use_master_clock;
+#ifdef CONFIG_KVM_XEN
+ /*
+ * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
+ * explicitly told to use TSC as its clocksource Xen will not set this bit.
+ * This default behaviour led to bugs in some guest kernels which cause
+ * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
+ */
+ bool xen_pvclock_tsc_unstable =
+ ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+#endif
kernel_ns = 0;
host_tsc = 0;
@@ -3239,13 +3260,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.flags = pvclock_flags;
if (vcpu->pv_time.active)
- kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
+ kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
#ifdef CONFIG_KVM_XEN
if (vcpu->xen.vcpu_info_cache.active)
kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
- offsetof(struct compat_vcpu_info, time));
+ offsetof(struct compat_vcpu_info, time),
+ xen_pvclock_tsc_unstable);
if (vcpu->xen.vcpu_time_info_cache.active)
- kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
+ kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
+ xen_pvclock_tsc_unstable);
#endif
kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0;
@@ -4020,6 +4043,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* the need to ignore the workaround.
*/
break;
+#ifdef CONFIG_KVM_HYPERV
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
case HV_X64_MSR_SYNDBG_OPTIONS:
@@ -4032,6 +4056,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_TSC_INVARIANT_CONTROL:
return kvm_hv_set_msr_common(vcpu, msr, data,
msr_info->host_initiated);
+#endif
case MSR_IA32_BBL_CR_CTL3:
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
@@ -4377,6 +4402,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
msr_info->data = 0x20000000;
break;
+#ifdef CONFIG_KVM_HYPERV
case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
case HV_X64_MSR_SYNDBG_OPTIONS:
@@ -4390,6 +4416,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_hv_get_msr_common(vcpu,
msr_info->index, &msr_info->data,
msr_info->host_initiated);
+#endif
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
* silicon. It is however accessed by winxp in very narrow
@@ -4527,6 +4554,7 @@ static inline bool kvm_can_mwait_in_guest(void)
boot_cpu_has(X86_FEATURE_ARAT);
}
+#ifdef CONFIG_KVM_HYPERV
static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 __user *cpuid_arg)
{
@@ -4547,6 +4575,14 @@ static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
return 0;
}
+#endif
+
+static bool kvm_is_vm_type_supported(unsigned long type)
+{
+ return type == KVM_X86_DEFAULT_VM ||
+ (type == KVM_X86_SW_PROTECTED_VM &&
+ IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled);
+}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
@@ -4573,9 +4609,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_VCPU_EVENTS:
+#ifdef CONFIG_KVM_HYPERV
case KVM_CAP_HYPERV:
case KVM_CAP_HYPERV_VAPIC:
case KVM_CAP_HYPERV_SPIN:
+ case KVM_CAP_HYPERV_TIME:
case KVM_CAP_HYPERV_SYNIC:
case KVM_CAP_HYPERV_SYNIC2:
case KVM_CAP_HYPERV_VP_INDEX:
@@ -4585,6 +4623,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_CPUID:
case KVM_CAP_HYPERV_ENFORCE_CPUID:
case KVM_CAP_SYS_HYPERV_CPUID:
+#endif
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -4594,7 +4633,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
- case KVM_CAP_HYPERV_TIME:
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
case KVM_CAP_TSC_DEADLINE_TIMER:
case KVM_CAP_DISABLE_QUIRKS:
@@ -4625,6 +4663,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
case KVM_CAP_IRQFD_RESAMPLE:
+ case KVM_CAP_MEMORY_FAULT_INFO:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@@ -4638,7 +4677,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
KVM_XEN_HVM_CONFIG_SHARED_INFO |
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
- KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+ KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+ KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
@@ -4704,12 +4744,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = kvm_x86_ops.nested_ops->get_state ?
kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
break;
+#ifdef CONFIG_KVM_HYPERV
case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
r = kvm_x86_ops.enable_l2_tlb_flush != NULL;
break;
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
break;
+#endif
case KVM_CAP_SMALLER_MAXPHYADDR:
r = (int) allow_smaller_maxphyaddr;
break;
@@ -4738,6 +4780,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_X86_NOTIFY_VMEXIT:
r = kvm_caps.has_notify_vmexit;
break;
+ case KVM_CAP_VM_TYPES:
+ r = BIT(KVM_X86_DEFAULT_VM);
+ if (kvm_is_vm_type_supported(KVM_X86_SW_PROTECTED_VM))
+ r |= BIT(KVM_X86_SW_PROTECTED_VM);
+ break;
default:
break;
}
@@ -4871,9 +4918,11 @@ long kvm_arch_dev_ioctl(struct file *filp,
case KVM_GET_MSRS:
r = msr_io(NULL, argp, do_get_msr_feature, 1);
break;
+#ifdef CONFIG_KVM_HYPERV
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
break;
+#endif
case KVM_GET_DEVICE_ATTR: {
struct kvm_device_attr attr;
r = -EFAULT;
@@ -5699,14 +5748,11 @@ static int kvm_vcpu_ioctl_device_attr(struct kvm_vcpu *vcpu,
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
- int r;
- uint16_t vmcs_version;
- void __user *user_ptr;
-
if (cap->flags)
return -EINVAL;
switch (cap->cap) {
+#ifdef CONFIG_KVM_HYPERV
case KVM_CAP_HYPERV_SYNIC2:
if (cap->args[0])
return -EINVAL;
@@ -5718,16 +5764,22 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
- if (!kvm_x86_ops.nested_ops->enable_evmcs)
- return -ENOTTY;
- r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
- if (!r) {
- user_ptr = (void __user *)(uintptr_t)cap->args[0];
- if (copy_to_user(user_ptr, &vmcs_version,
- sizeof(vmcs_version)))
- r = -EFAULT;
+ {
+ int r;
+ uint16_t vmcs_version;
+ void __user *user_ptr;
+
+ if (!kvm_x86_ops.nested_ops->enable_evmcs)
+ return -ENOTTY;
+ r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
+ if (!r) {
+ user_ptr = (void __user *)(uintptr_t)cap->args[0];
+ if (copy_to_user(user_ptr, &vmcs_version,
+ sizeof(vmcs_version)))
+ r = -EFAULT;
+ }
+ return r;
}
- return r;
case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
if (!kvm_x86_ops.enable_l2_tlb_flush)
return -ENOTTY;
@@ -5736,6 +5788,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_HYPERV_ENFORCE_CPUID:
return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]);
+#endif
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];
@@ -6128,9 +6181,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
+#ifdef CONFIG_KVM_HYPERV
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
break;
+#endif
#ifdef CONFIG_KVM_XEN
case KVM_XEN_VCPU_GET_ATTR: {
struct kvm_xen_vcpu_attr xva;
@@ -7188,6 +7243,7 @@ set_pit2_out:
r = static_call(kvm_x86_mem_enc_unregister_region)(kvm, &region);
break;
}
+#ifdef CONFIG_KVM_HYPERV
case KVM_HYPERV_EVENTFD: {
struct kvm_hyperv_eventfd hvevfd;
@@ -7197,6 +7253,7 @@ set_pit2_out:
r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
break;
}
+#endif
case KVM_SET_PMU_EVENT_FILTER:
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
break;
@@ -8432,6 +8489,15 @@ static void emulator_vm_bugged(struct x86_emulate_ctxt *ctxt)
kvm_vm_bugged(kvm);
}
+static gva_t emulator_get_untagged_addr(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, unsigned int flags)
+{
+ if (!kvm_x86_ops.get_untagged_addr)
+ return addr;
+
+ return static_call(kvm_x86_get_untagged_addr)(emul_to_vcpu(ctxt), addr, flags);
+}
+
static const struct x86_emulate_ops emulate_ops = {
.vm_bugged = emulator_vm_bugged,
.read_gpr = emulator_read_gpr,
@@ -8476,6 +8542,7 @@ static const struct x86_emulate_ops emulate_ops = {
.leave_smm = emulator_leave_smm,
.triple_fault = emulator_triple_fault,
.set_xcr = emulator_set_xcr,
+ .get_untagged_addr = emulator_get_untagged_addr,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -10575,19 +10642,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
{
- u64 eoi_exit_bitmap[4];
-
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
+#ifdef CONFIG_KVM_HYPERV
if (to_hv_vcpu(vcpu)) {
+ u64 eoi_exit_bitmap[4];
+
bitmap_or((ulong *)eoi_exit_bitmap,
vcpu->arch.ioapic_handled_vectors,
to_hv_synic(vcpu)->vec_bitmap, 256);
static_call_cond(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
return;
}
-
+#endif
static_call_cond(kvm_x86_load_eoi_exitmap)(
vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
}
@@ -10678,9 +10746,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* the flushes are considered "remote" and not "local" because
* the requests can be initiated from other vCPUs.
*/
+#ifdef CONFIG_KVM_HYPERV
if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu) &&
kvm_hv_vcpu_flush_tlb(vcpu))
kvm_vcpu_flush_tlb_guest(vcpu);
+#endif
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -10733,6 +10803,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu_load_eoi_exitmap(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
+#ifdef CONFIG_KVM_HYPERV
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
@@ -10763,6 +10834,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
*/
if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
kvm_hv_process_stimers(vcpu);
+#endif
if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
kvm_vcpu_update_apicv(vcpu);
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
@@ -11081,6 +11153,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
+ vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
@@ -11598,7 +11671,7 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
*/
if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
return false;
- if (kvm_vcpu_is_illegal_gpa(vcpu, sregs->cr3))
+ if (!kvm_vcpu_is_legal_cr3(vcpu, sregs->cr3))
return false;
} else {
/*
@@ -12207,7 +12280,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
}
if (!init_event) {
- kvm_pmu_reset(vcpu);
vcpu->arch.smbase = 0x30000;
vcpu->arch.msr_misc_features_enables = 0;
@@ -12424,7 +12496,9 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_free_vm(struct kvm *kvm)
{
- kfree(to_kvm_hv(kvm)->hv_pa_pg);
+#if IS_ENABLED(CONFIG_HYPERV)
+ kfree(kvm->arch.hv_pa_pg);
+#endif
__kvm_arch_free_vm(kvm);
}
@@ -12434,9 +12508,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
int ret;
unsigned long flags;
- if (type)
+ if (!kvm_is_vm_type_supported(type))
return -EINVAL;
+ kvm->arch.vm_type = type;
+
ret = kvm_page_track_init(kvm);
if (ret)
goto out;
@@ -12575,8 +12651,8 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
hva = slot->userspace_addr;
}
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_userspace_memory_region m;
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+ struct kvm_userspace_memory_region2 m;
m.slot = id | (i << 16);
m.flags = 0;
@@ -12726,6 +12802,10 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
}
}
+#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
+ kvm_mmu_init_memslot_memory_attributes(kvm, slot);
+#endif
+
if (kvm_page_track_create_memslot(kvm, slot, npages))
goto out_free;
@@ -13536,6 +13616,10 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
switch (type) {
case INVPCID_TYPE_INDIV_ADDR:
+ /*
+ * LAM doesn't apply to addresses that are inputs to TLB
+ * invalidation.
+ */
if ((!pcid_enabled && (operand.pcid != 0)) ||
is_noncanonical_address(operand.gla, vcpu)) {
kvm_inject_gp(vcpu, 0);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5184fde1dc54..2f7e19166658 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -530,6 +530,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
__reserved_bits |= X86_CR4_VMXE; \
if (!__cpu_has(__c, X86_FEATURE_PCID)) \
__reserved_bits |= X86_CR4_PCIDE; \
+ if (!__cpu_has(__c, X86_FEATURE_LAM)) \
+ __reserved_bits |= X86_CR4_LAM_SUP; \
__reserved_bits; \
})
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 523bb6df5ac9..4b4e738c6f1b 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1162,7 +1162,9 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
{
/* Only some feature flags need to be *enabled* by userspace */
u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
- KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+ KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+ KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+ u32 old_flags;
if (xhc->flags & ~permitted_flags)
return -EINVAL;
@@ -1183,9 +1185,14 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
+ old_flags = kvm->arch.xen_hvm_config.flags;
memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
mutex_unlock(&kvm->arch.xen.xen_lock);
+
+ if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
+
return 0;
}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ea2eb2ec90e2..55c4b07ec1f6 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -283,6 +283,9 @@ static int setup_mcfg_map(struct acpi_pci_root_info *ci)
info->mcfg_added = false;
seg = info->sd.domain;
+ dev_dbg(dev, "%s(%04x %pR ECAM %pa)\n", __func__, seg,
+ &root->secondary, &root->mcfg_addr);
+
/* return success if MMCFG is not in use */
if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg)
return 0;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 4b3efaa82ab7..0cc9520666ef 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * mmconfig-shared.c - Low-level direct PCI config space access via
- * MMCONFIG - common code between i386 and x86-64.
+ * Low-level direct PCI config space access via ECAM - common code between
+ * i386 and x86-64.
*
* This code does:
* - known chipset handling
@@ -11,6 +11,8 @@
* themselves.
*/
+#define pr_fmt(fmt) "PCI: " fmt
+
#include <linux/acpi.h>
#include <linux/efi.h>
#include <linux/pci.h>
@@ -24,9 +26,7 @@
#include <asm/pci_x86.h>
#include <asm/acpi.h>
-#define PREFIX "PCI: "
-
-/* Indicate if the mmcfg resources have been placed into the resource table. */
+/* Indicate if the ECAM resources have been placed into the resource table */
static bool pci_mmcfg_running_state;
static bool pci_mmcfg_arch_init_failed;
static DEFINE_MUTEX(pci_mmcfg_lock);
@@ -90,7 +90,7 @@ static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
res->end = addr + PCI_MMCFG_BUS_OFFSET(end + 1) - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
- "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
+ "PCI ECAM %04x [bus %02x-%02x]", segment, start, end);
res->name = new->name;
return new;
@@ -102,16 +102,15 @@ struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
struct pci_mmcfg_region *new;
new = pci_mmconfig_alloc(segment, start, end, addr);
- if (new) {
- mutex_lock(&pci_mmcfg_lock);
- list_add_sorted(new);
- mutex_unlock(&pci_mmcfg_lock);
+ if (!new)
+ return NULL;
- pr_info(PREFIX
- "MMCONFIG for domain %04x [bus %02x-%02x] at %pR "
- "(base %#lx)\n",
- segment, start, end, &new->res, (unsigned long)addr);
- }
+ mutex_lock(&pci_mmcfg_lock);
+ list_add_sorted(new);
+ mutex_unlock(&pci_mmcfg_lock);
+
+ pr_info("ECAM %pR (base %#lx) for domain %04x [bus %02x-%02x]\n",
+ &new->res, (unsigned long)addr, segment, start, end);
return new;
}
@@ -205,7 +204,7 @@ static const char *__init pci_mmcfg_amd_fam10h(void)
msr <<= 32;
msr |= low;
- /* mmconfig is not enable */
+ /* ECAM is not enabled */
if (!(msr & FAM10H_MMIO_CONF_ENABLE))
return NULL;
@@ -367,7 +366,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
name = pci_mmcfg_probes[i].probe();
if (name)
- pr_info(PREFIX "%s with MMCONFIG support\n", name);
+ pr_info("%s with ECAM support\n", name);
}
/* some end_bus_number is crazy, fix it */
@@ -443,9 +442,11 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used)
return mcfg_res.flags;
}
-static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used)
+static bool is_efi_mmio(struct resource *res)
{
#ifdef CONFIG_EFI
+ u64 start = res->start;
+ u64 end = res->start + resource_size(res);
efi_memory_desc_t *md;
u64 size, mmio_start, mmio_end;
@@ -455,11 +456,6 @@ static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used)
mmio_start = md->phys_addr;
mmio_end = mmio_start + size;
- /*
- * N.B. Caller supplies (start, start + size),
- * so to match, mmio_end is the first address
- * *past* the EFI_MEMORY_MAPPED_IO area.
- */
if (mmio_start <= start && end <= mmio_end)
return true;
}
@@ -490,11 +486,10 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
return false;
if (dev)
- dev_info(dev, "MMCONFIG at %pR reserved as %s\n",
+ dev_info(dev, "ECAM %pR reserved as %s\n",
&cfg->res, method);
else
- pr_info(PREFIX "MMCONFIG at %pR reserved as %s\n",
- &cfg->res, method);
+ pr_info("ECAM %pR reserved as %s\n", &cfg->res, method);
if (old_size != size) {
/* update end_bus */
@@ -503,47 +498,51 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
cfg->res.end = cfg->res.start +
PCI_MMCFG_BUS_OFFSET(num_buses) - 1;
snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN,
- "PCI MMCONFIG %04x [bus %02x-%02x]",
+ "PCI ECAM %04x [bus %02x-%02x]",
cfg->segment, cfg->start_bus, cfg->end_bus);
if (dev)
- dev_info(dev,
- "MMCONFIG "
- "at %pR (base %#lx) (size reduced!)\n",
- &cfg->res, (unsigned long) cfg->address);
+ dev_info(dev, "ECAM %pR (base %#lx) (size reduced!)\n",
+ &cfg->res, (unsigned long) cfg->address);
else
- pr_info(PREFIX
- "MMCONFIG for %04x [bus%02x-%02x] "
- "at %pR (base %#lx) (size reduced!)\n",
- cfg->segment, cfg->start_bus, cfg->end_bus,
- &cfg->res, (unsigned long) cfg->address);
+ pr_info("ECAM %pR (base %#lx) for %04x [bus%02x-%02x] (size reduced!)\n",
+ &cfg->res, (unsigned long) cfg->address,
+ cfg->segment, cfg->start_bus, cfg->end_bus);
}
return true;
}
-static bool __ref
-pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early)
+static bool __ref pci_mmcfg_reserved(struct device *dev,
+ struct pci_mmcfg_region *cfg, int early)
{
+ struct resource *conflict;
+
if (!early && !acpi_disabled) {
if (is_mmconf_reserved(is_acpi_reserved, cfg, dev,
"ACPI motherboard resource"))
return true;
if (dev)
- dev_info(dev, FW_INFO
- "MMCONFIG at %pR not reserved in "
- "ACPI motherboard resources\n",
+ dev_info(dev, FW_INFO "ECAM %pR not reserved in ACPI motherboard resources\n",
&cfg->res);
else
- pr_info(FW_INFO PREFIX
- "MMCONFIG at %pR not reserved in "
- "ACPI motherboard resources\n",
- &cfg->res);
-
- if (is_mmconf_reserved(is_efi_mmio, cfg, dev,
- "EfiMemoryMappedIO"))
+ pr_info(FW_INFO "ECAM %pR not reserved in ACPI motherboard resources\n",
+ &cfg->res);
+
+ if (is_efi_mmio(&cfg->res)) {
+ pr_info("ECAM %pR is EfiMemoryMappedIO; assuming valid\n",
+ &cfg->res);
+ conflict = insert_resource_conflict(&iomem_resource,
+ &cfg->res);
+ if (conflict)
+ pr_warn("ECAM %pR conflicts with %s %pR\n",
+ &cfg->res, conflict->name, conflict);
+ else
+ pr_info("ECAM %pR reserved to work around lack of ACPI motherboard _CRS\n",
+ &cfg->res);
return true;
+ }
}
/*
@@ -569,30 +568,31 @@ static void __init pci_mmcfg_reject_broken(int early)
struct pci_mmcfg_region *cfg;
list_for_each_entry(cfg, &pci_mmcfg_list, list) {
- if (pci_mmcfg_check_reserved(NULL, cfg, early) == 0) {
- pr_info(PREFIX "not using MMCONFIG\n");
+ if (!pci_mmcfg_reserved(NULL, cfg, early)) {
+ pr_info("not using ECAM (%pR not reserved)\n",
+ &cfg->res);
free_all_mmcfg();
return;
}
}
}
-static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
- struct acpi_mcfg_allocation *cfg)
+static bool __init acpi_mcfg_valid_entry(struct acpi_table_mcfg *mcfg,
+ struct acpi_mcfg_allocation *cfg)
{
if (cfg->address < 0xFFFFFFFF)
- return 0;
+ return true;
if (!strncmp(mcfg->header.oem_id, "SGI", 3))
- return 0;
+ return true;
if ((mcfg->header.revision >= 1) && (dmi_get_bios_year() >= 2010))
- return 0;
+ return true;
- pr_err(PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx "
- "is above 4GB, ignored\n", cfg->pci_segment,
- cfg->start_bus_number, cfg->end_bus_number, cfg->address);
- return -EINVAL;
+ pr_err("ECAM at %#llx for %04x [bus %02x-%02x] is above 4GB, ignored\n",
+ cfg->address, cfg->pci_segment, cfg->start_bus_number,
+ cfg->end_bus_number);
+ return false;
}
static int __init pci_parse_mcfg(struct acpi_table_header *header)
@@ -616,21 +616,21 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
i -= sizeof(struct acpi_mcfg_allocation);
}
if (entries == 0) {
- pr_err(PREFIX "MMCONFIG has no entries\n");
+ pr_err("MCFG has no entries\n");
return -ENODEV;
}
cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
for (i = 0; i < entries; i++) {
cfg = &cfg_table[i];
- if (acpi_mcfg_check_entry(mcfg, cfg)) {
+ if (!acpi_mcfg_valid_entry(mcfg, cfg)) {
free_all_mmcfg();
return -ENODEV;
}
if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
cfg->end_bus_number, cfg->address) == NULL) {
- pr_warn(PREFIX "no memory for MCFG entries\n");
+ pr_warn("no memory for MCFG entries\n");
free_all_mmcfg();
return -ENOMEM;
}
@@ -667,6 +667,8 @@ static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size,
static void __init __pci_mmcfg_init(int early)
{
+ pr_debug("%s(%s)\n", __func__, early ? "early" : "late");
+
pci_mmcfg_reject_broken(early);
if (list_empty(&pci_mmcfg_list))
return;
@@ -693,6 +695,8 @@ static int __initdata known_bridge;
void __init pci_mmcfg_early_init(void)
{
+ pr_debug("%s() pci_probe %#x\n", __func__, pci_probe);
+
if (pci_probe & PCI_PROBE_MMCONF) {
if (pci_mmcfg_check_hostbridge())
known_bridge = 1;
@@ -706,14 +710,16 @@ void __init pci_mmcfg_early_init(void)
void __init pci_mmcfg_late_init(void)
{
- /* MMCONFIG disabled */
+ pr_debug("%s() pci_probe %#x\n", __func__, pci_probe);
+
+ /* ECAM disabled */
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return;
if (known_bridge)
return;
- /* MMCONFIG hasn't been enabled yet, try again */
+ /* ECAM hasn't been enabled yet, try again */
if (pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF) {
acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
__pci_mmcfg_init(0);
@@ -726,7 +732,9 @@ static int __init pci_mmcfg_late_insert_resources(void)
pci_mmcfg_running_state = true;
- /* If we are not using MMCONFIG, don't insert the resources. */
+ pr_debug("%s() pci_probe %#x\n", __func__, pci_probe);
+
+ /* If we are not using ECAM, don't insert the resources. */
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return 1;
@@ -735,21 +743,24 @@ static int __init pci_mmcfg_late_insert_resources(void)
* marked so it won't cause request errors when __request_region is
* called.
*/
- list_for_each_entry(cfg, &pci_mmcfg_list, list)
- if (!cfg->res.parent)
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+ if (!cfg->res.parent) {
+ pr_debug("%s() insert %pR\n", __func__, &cfg->res);
insert_resource(&iomem_resource, &cfg->res);
+ }
+ }
return 0;
}
/*
- * Perform MMCONFIG resource insertion after PCI initialization to allow for
+ * Perform ECAM resource insertion after PCI initialization to allow for
* misprogrammed MCFG tables that state larger sizes but actually conflict
* with other system resources.
*/
late_initcall(pci_mmcfg_late_insert_resources);
-/* Add MMCFG information for host bridges */
+/* Add ECAM information for host bridges */
int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
phys_addr_t addr)
{
@@ -757,6 +768,8 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
struct resource *tmp = NULL;
struct pci_mmcfg_region *cfg;
+ dev_dbg(dev, "%s(%04x [bus %02x-%02x])\n", __func__, seg, start, end);
+
if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed)
return -ENODEV;
@@ -767,15 +780,17 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
cfg = pci_mmconfig_lookup(seg, start);
if (cfg) {
if (cfg->end_bus < end)
- dev_info(dev, FW_INFO
- "MMCONFIG for "
- "domain %04x [bus %02x-%02x] "
- "only partially covers this bridge\n",
- cfg->segment, cfg->start_bus, cfg->end_bus);
+ dev_info(dev, FW_INFO "ECAM %pR for domain %04x [bus %02x-%02x] only partially covers this bridge\n",
+ &cfg->res, cfg->segment, cfg->start_bus,
+ cfg->end_bus);
mutex_unlock(&pci_mmcfg_lock);
return -EEXIST;
}
+ /*
+ * Don't move earlier; we must return -EEXIST, not -EINVAL, if
+ * pci_mmconfig_lookup() finds something
+ */
if (!addr) {
mutex_unlock(&pci_mmcfg_lock);
return -EINVAL;
@@ -784,10 +799,10 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
rc = -EBUSY;
cfg = pci_mmconfig_alloc(seg, start, end, addr);
if (cfg == NULL) {
- dev_warn(dev, "fail to add MMCONFIG (out of memory)\n");
+ dev_warn(dev, "fail to add ECAM (out of memory)\n");
rc = -ENOMEM;
- } else if (!pci_mmcfg_check_reserved(dev, cfg, 0)) {
- dev_warn(dev, FW_BUG "MMCONFIG %pR isn't reserved\n",
+ } else if (!pci_mmcfg_reserved(dev, cfg, 0)) {
+ dev_warn(dev, FW_BUG "ECAM %pR isn't reserved\n",
&cfg->res);
} else {
/* Insert resource if it's not in boot stage */
@@ -796,16 +811,13 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
&cfg->res);
if (tmp) {
- dev_warn(dev,
- "MMCONFIG %pR conflicts with "
- "%s %pR\n",
+ dev_warn(dev, "ECAM %pR conflicts with %s %pR\n",
&cfg->res, tmp->name, tmp);
} else if (pci_mmcfg_arch_map(cfg)) {
- dev_warn(dev, "fail to map MMCONFIG %pR.\n",
- &cfg->res);
+ dev_warn(dev, "fail to map ECAM %pR\n", &cfg->res);
} else {
list_add_sorted(cfg);
- dev_info(dev, "MMCONFIG at %pR (base %#lx)\n",
+ dev_info(dev, "ECAM %pR (base %#lx)\n",
&cfg->res, (unsigned long)addr);
cfg = NULL;
rc = 0;
@@ -823,7 +835,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
return rc;
}
-/* Delete MMCFG information for host bridges */
+/* Delete ECAM information for host bridges */
int pci_mmconfig_delete(u16 seg, u8 start, u8 end)
{
struct pci_mmcfg_region *cfg;
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index bfa789875322..f9ef97c593cf 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -131,7 +131,7 @@ const struct pci_raw_ops pci_mmcfg = {
int __init pci_mmcfg_arch_init(void)
{
- printk(KERN_INFO "PCI: Using MMCONFIG for extended config space\n");
+ printk(KERN_INFO "PCI: Using ECAM for extended config space\n");
raw_pci_ext_ops = &pci_mmcfg;
return 1;
}
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 0c7b6e66c644..cb5aa79a759e 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -6,6 +6,8 @@
* space mapped. This allows lockless config space operation.
*/
+#define pr_fmt(fmt) "PCI: " fmt
+
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/acpi.h>
@@ -14,8 +16,6 @@
#include <asm/e820/api.h>
#include <asm/pci_x86.h>
-#define PREFIX "PCI: "
-
static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
{
struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
@@ -111,6 +111,25 @@ static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)
return addr;
}
+int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
+{
+ cfg->virt = mcfg_ioremap(cfg);
+ if (!cfg->virt) {
+ pr_err("can't map ECAM at %pR\n", &cfg->res);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg)
+{
+ if (cfg && cfg->virt) {
+ iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus));
+ cfg->virt = NULL;
+ }
+}
+
int __init pci_mmcfg_arch_init(void)
{
struct pci_mmcfg_region *cfg;
@@ -133,22 +152,3 @@ void __init pci_mmcfg_arch_free(void)
list_for_each_entry(cfg, &pci_mmcfg_list, list)
pci_mmcfg_arch_unmap(cfg);
}
-
-int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
-{
- cfg->virt = mcfg_ioremap(cfg);
- if (!cfg->virt) {
- pr_err(PREFIX "can't map MMCONFIG at %pR\n", &cfg->res);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg)
-{
- if (cfg && cfg->virt) {
- iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus));
- cfg->virt = NULL;
- }
-}
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 4f15280732ed..244c643bb0b5 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -3,6 +3,8 @@
* BIOS32 and PCI BIOS handling.
*/
+#include <linux/bits.h>
+#include <linux/bitfield.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -29,8 +31,19 @@
#define PCIBIOS_HW_TYPE1_SPEC 0x10
#define PCIBIOS_HW_TYPE2_SPEC 0x20
+/*
+ * Returned in EAX:
+ * - AH: return code
+ */
+#define PCIBIOS_RETURN_CODE GENMASK(15, 8)
+
int pcibios_enabled;
+static u8 pcibios_get_return_code(u32 eax)
+{
+ return FIELD_GET(PCIBIOS_RETURN_CODE, eax);
+}
+
/* According to the BIOS specification at:
* http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could
* restrict the x zone to some pages and make it ro. But this may be
@@ -154,7 +167,7 @@ static int __init check_pcibios(void)
: "memory");
local_irq_restore(flags);
- status = (eax >> 8) & 0xff;
+ status = pcibios_get_return_code(eax);
hw_mech = eax & 0xff;
major_ver = (ebx >> 8) & 0xff;
minor_ver = ebx & 0xff;
@@ -227,7 +240,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
raw_spin_unlock_irqrestore(&pci_config_lock, flags);
- return (int)((result & 0xff00) >> 8);
+ return pcibios_get_return_code(result);
}
static int pci_bios_write(unsigned int seg, unsigned int bus,
@@ -269,7 +282,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus,
raw_spin_unlock_irqrestore(&pci_config_lock, flags);
- return (int)((result & 0xff00) >> 8);
+ return pcibios_get_return_code(result);
}
@@ -385,9 +398,10 @@ struct irq_routing_table * pcibios_get_irq_routing_table(void)
"m" (opt)
: "memory");
DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map);
- if (ret & 0xff00)
- printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
- else if (opt.size) {
+ ret = pcibios_get_return_code(ret);
+ if (ret) {
+ printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", ret);
+ } else if (opt.size) {
rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
if (rt) {
memset(rt, 0, sizeof(struct irq_routing_table));
@@ -415,7 +429,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
"b" ((dev->bus->number << 8) | dev->devfn),
"c" ((irq << 8) | (pin + 10)),
"S" (&pci_indirect));
- return !(ret & 0xff00);
+ return pcibios_get_return_code(ret) == PCIBIOS_SUCCESSFUL;
}
EXPORT_SYMBOL(pcibios_set_irq_routing);
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 6523eb7c3bd1..6052200fe925 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -168,8 +168,8 @@ do { \
(pr_reg)[18] = (_regs)->regs.gp[18]; \
(pr_reg)[19] = (_regs)->regs.gp[19]; \
(pr_reg)[20] = (_regs)->regs.gp[20]; \
- (pr_reg)[21] = current->thread.arch.fs; \
- (pr_reg)[22] = 0; \
+ (pr_reg)[21] = (_regs)->regs.gp[21]; \
+ (pr_reg)[22] = (_regs)->regs.gp[22]; \
(pr_reg)[23] = 0; \
(pr_reg)[24] = 0; \
(pr_reg)[25] = 0; \
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h
index 1ef9c21877bc..f90159508936 100644
--- a/arch/x86/um/asm/processor_64.h
+++ b/arch/x86/um/asm/processor_64.h
@@ -10,13 +10,11 @@
struct arch_thread {
unsigned long debugregs[8];
int debugregs_seq;
- unsigned long fs;
struct faultinfo faultinfo;
};
#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \
.debugregs_seq = 0, \
- .fs = 0, \
.faultinfo = { 0, 0, 0 } }
#define STACKSLOTS_PER_LINE 4
@@ -28,7 +26,6 @@ static inline void arch_flush_thread(struct arch_thread *thread)
static inline void arch_copy_thread(struct arch_thread *from,
struct arch_thread *to)
{
- to->fs = from->fs;
}
#define current_sp() ({ void *sp; __asm__("movq %%rsp, %0" : "=r" (sp) : ); sp; })
diff --git a/arch/x86/um/os-Linux/Makefile b/arch/x86/um/os-Linux/Makefile
index ae169125d03f..5249bbc30dcd 100644
--- a/arch/x86/um/os-Linux/Makefile
+++ b/arch/x86/um/os-Linux/Makefile
@@ -6,7 +6,6 @@
obj-y = registers.o task_size.o mcontext.o
obj-$(CONFIG_X86_32) += tls.o
-obj-$(CONFIG_64BIT) += prctl.o
USER_OBJS := $(obj-y)
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
deleted file mode 100644
index 8431e87ac333..000000000000
--- a/arch/x86/um/os-Linux/prctl.c
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com})
- * Licensed under the GPL
- */
-
-#include <sys/ptrace.h>
-#include <asm/ptrace.h>
-
-int os_arch_prctl(int pid, int option, unsigned long *arg2)
-{
- return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
-}
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 0bc4b73a9cde..7f1abde2c84b 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -25,30 +25,6 @@ void arch_switch_to(struct task_struct *to)
printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n");
}
-int is_syscall(unsigned long addr)
-{
- unsigned short instr;
- int n;
-
- n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
- if (n) {
- /* access_process_vm() grants access to vsyscall and stub,
- * while copy_from_user doesn't. Maybe access_process_vm is
- * slow, but that doesn't matter, since it will be called only
- * in case of singlestepping, if copy_from_user failed.
- */
- n = access_process_vm(current, addr, &instr, sizeof(instr),
- FOLL_FORCE);
- if (n != sizeof(instr)) {
- printk(KERN_ERR "is_syscall : failed to read "
- "instruction from 0x%lx\n", addr);
- return 1;
- }
- }
- /* int 0x80 or sysenter */
- return (instr == 0x80cd) || (instr == 0x340f);
-}
-
/* determines which flags the user has access to. */
/* 1 = access 0 = no access */
#define FLAG_MASK 0x00044dd5
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index 289d0159b041..aa68d83d3f44 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -188,32 +188,6 @@ int peek_user(struct task_struct *child, long addr, long data)
return put_user(tmp, (unsigned long *) data);
}
-/* XXX Mostly copied from sys-i386 */
-int is_syscall(unsigned long addr)
-{
- unsigned short instr;
- int n;
-
- n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
- if (n) {
- /*
- * access_process_vm() grants access to vsyscall and stub,
- * while copy_from_user doesn't. Maybe access_process_vm is
- * slow, but that doesn't matter, since it will be called only
- * in case of singlestepping, if copy_from_user failed.
- */
- n = access_process_vm(current, addr, &instr, sizeof(instr),
- FOLL_FORCE);
- if (n != sizeof(instr)) {
- printk("is_syscall : failed to read instruction from "
- "0x%lx\n", addr);
- return 1;
- }
- }
- /* sysenter */
- return instr == 0x050f;
-}
-
static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
{
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
diff --git a/arch/x86/um/shared/sysdep/ptrace_32.h b/arch/x86/um/shared/sysdep/ptrace_32.h
index db8478a83a09..0c4989842fbe 100644
--- a/arch/x86/um/shared/sysdep/ptrace_32.h
+++ b/arch/x86/um/shared/sysdep/ptrace_32.h
@@ -8,10 +8,6 @@
#define MAX_FP_NR HOST_FPX_SIZE
-void set_using_sysemu(int value);
-int get_using_sysemu(void);
-extern int sysemu_supported;
-
#define UPT_SYSCALL_ARG1(r) UPT_BX(r)
#define UPT_SYSCALL_ARG2(r) UPT_CX(r)
#define UPT_SYSCALL_ARG3(r) UPT_DX(r)
diff --git a/arch/x86/um/shared/sysdep/ptrace_user.h b/arch/x86/um/shared/sysdep/ptrace_user.h
index 44782bbad41e..1d1a824fa652 100644
--- a/arch/x86/um/shared/sysdep/ptrace_user.h
+++ b/arch/x86/um/shared/sysdep/ptrace_user.h
@@ -15,14 +15,12 @@
#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE)
#else
#define FP_SIZE HOST_FP_SIZE
+#endif
/*
- * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though
- * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the
- * 2.4 name and value for 2.4 host compatibility.
+ * glibc before 2.27 does not include PTRACE_SYSEMU_SINGLESTEP in its enum,
+ * ensure we have a definition by (re-)defining it here.
*/
-#ifndef PTRACE_OLDSETOPTIONS
-#define PTRACE_OLDSETOPTIONS 21
-#endif
-
+#ifndef PTRACE_SYSEMU_SINGLESTEP
+#define PTRACE_SYSEMU_SINGLESTEP 32
#endif
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index 38fa894b65d0..ea8b5a2d67af 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -12,72 +12,79 @@
#define STUB_MMAP_NR __NR_mmap2
#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
-static inline long stub_syscall0(long syscall)
+static __always_inline long stub_syscall0(long syscall)
{
long ret;
- __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall));
+ __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall)
+ : "memory");
return ret;
}
-static inline long stub_syscall1(long syscall, long arg1)
+static __always_inline long stub_syscall1(long syscall, long arg1)
{
long ret;
- __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1));
+ __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1)
+ : "memory");
return ret;
}
-static inline long stub_syscall2(long syscall, long arg1, long arg2)
+static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
- "c" (arg2));
+ "c" (arg2)
+ : "memory");
return ret;
}
-static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+static __always_inline long stub_syscall3(long syscall, long arg1, long arg2,
+ long arg3)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
- "c" (arg2), "d" (arg3));
+ "c" (arg2), "d" (arg3)
+ : "memory");
return ret;
}
-static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
- long arg4)
+static __always_inline long stub_syscall4(long syscall, long arg1, long arg2,
+ long arg3, long arg4)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
- "c" (arg2), "d" (arg3), "S" (arg4));
+ "c" (arg2), "d" (arg3), "S" (arg4)
+ : "memory");
return ret;
}
-static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
- long arg4, long arg5)
+static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
+ long arg3, long arg4, long arg5)
{
long ret;
__asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1),
- "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5));
+ "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
+ : "memory");
return ret;
}
-static inline void trap_myself(void)
+static __always_inline void trap_myself(void)
{
__asm("int3");
}
-static inline void remap_stack_and_trap(void)
+static __always_inline void remap_stack_and_trap(void)
{
__asm__ volatile (
"movl %%esp,%%ebx ;"
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 2de1c8f88173..b24168ef0ac4 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -16,7 +16,7 @@
#define __syscall_clobber "r11","rcx","memory"
#define __syscall "syscall"
-static inline long stub_syscall0(long syscall)
+static __always_inline long stub_syscall0(long syscall)
{
long ret;
@@ -27,7 +27,7 @@ static inline long stub_syscall0(long syscall)
return ret;
}
-static inline long stub_syscall2(long syscall, long arg1, long arg2)
+static __always_inline long stub_syscall2(long syscall, long arg1, long arg2)
{
long ret;
@@ -38,7 +38,8 @@ static inline long stub_syscall2(long syscall, long arg1, long arg2)
return ret;
}
-static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
+static __always_inline long stub_syscall3(long syscall, long arg1, long arg2,
+ long arg3)
{
long ret;
@@ -50,7 +51,7 @@ static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3)
return ret;
}
-static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
+static __always_inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
long arg4)
{
long ret;
@@ -64,8 +65,8 @@ static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3,
return ret;
}
-static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
- long arg4, long arg5)
+static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
+ long arg3, long arg4, long arg5)
{
long ret;
@@ -78,12 +79,12 @@ static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3,
return ret;
}
-static inline void trap_myself(void)
+static __always_inline void trap_myself(void)
{
__asm("int3");
}
-static inline void remap_stack_and_trap(void)
+static __always_inline void remap_stack_and_trap(void)
{
__asm__ volatile (
"movq %0,%%rax ;"
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index 27b29ae6c471..6a00a28c9cca 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -16,60 +16,24 @@
long arch_prctl(struct task_struct *task, int option,
unsigned long __user *arg2)
{
- unsigned long *ptr = arg2, tmp;
- long ret;
- int pid = task->mm->context.id.u.pid;
-
- /*
- * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to
- * be safe), we need to call arch_prctl on the host because
- * setting %fs may result in something else happening (like a
- * GDT or thread.fs being set instead). So, we let the host
- * fiddle the registers and thread struct and restore the
- * registers afterwards.
- *
- * So, the saved registers are stored to the process (this
- * needed because a stub may have been the last thing to run),
- * arch_prctl is run on the host, then the registers are read
- * back.
- */
- switch (option) {
- case ARCH_SET_FS:
- case ARCH_SET_GS:
- ret = restore_pid_registers(pid, &current->thread.regs.regs);
- if (ret)
- return ret;
- break;
- case ARCH_GET_FS:
- case ARCH_GET_GS:
- /*
- * With these two, we read to a local pointer and
- * put_user it to the userspace pointer that we were
- * given. If addr isn't valid (because it hasn't been
- * faulted in or is just bogus), we want put_user to
- * fault it in (or return -EFAULT) instead of having
- * the host return -EFAULT.
- */
- ptr = &tmp;
- }
-
- ret = os_arch_prctl(pid, option, ptr);
- if (ret)
- return ret;
+ long ret = -EINVAL;
switch (option) {
case ARCH_SET_FS:
- current->thread.arch.fs = (unsigned long) ptr;
- ret = save_registers(pid, &current->thread.regs.regs);
+ current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] =
+ (unsigned long) arg2;
+ ret = 0;
break;
case ARCH_SET_GS:
- ret = save_registers(pid, &current->thread.regs.regs);
+ current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)] =
+ (unsigned long) arg2;
+ ret = 0;
break;
case ARCH_GET_FS:
- ret = put_user(tmp, arg2);
+ ret = put_user(current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)], arg2);
break;
case ARCH_GET_GS:
- ret = put_user(tmp, arg2);
+ ret = put_user(current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)], arg2);
break;
}
@@ -83,10 +47,10 @@ SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
void arch_switch_to(struct task_struct *to)
{
- if ((to->thread.arch.fs == 0) || (to->mm == NULL))
- return;
-
- arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
+ /*
+ * Nothing needs to be done on x86_64.
+ * The FS_BASE/GS_BASE registers are saved in the ptrace register set.
+ */
}
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
index ebd3855d9b13..c51a613f6f5c 100644
--- a/arch/x86/um/tls_64.c
+++ b/arch/x86/um/tls_64.c
@@ -12,7 +12,7 @@ int arch_set_tls(struct task_struct *t, unsigned long tls)
* If CLONE_SETTLS is set, we need to save the thread id
* so it can be set during context switches.
*/
- t->thread.arch.fs = tls;
+ t->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] = tls;
return 0;
}
diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile
index 46ef8f73aebb..90da47eb85ee 100644
--- a/arch/x86/virt/vmx/tdx/Makefile
+++ b/arch/x86/virt/vmx/tdx/Makefile
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += seamcall.o
+obj-y += seamcall.o tdx.o
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
new file mode 100644
index 000000000000..4d6826a76f78
--- /dev/null
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -0,0 +1,1492 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(c) 2023 Intel Corporation.
+ *
+ * Intel Trusted Domain Extensions (TDX) support
+ */
+
+#define pr_fmt(fmt) "virt/tdx: " fmt
+
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/printk.h>
+#include <linux/cpu.h>
+#include <linux/spinlock.h>
+#include <linux/percpu-defs.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/memblock.h>
+#include <linux/memory.h>
+#include <linux/minmax.h>
+#include <linux/sizes.h>
+#include <linux/pfn.h>
+#include <linux/align.h>
+#include <linux/sort.h>
+#include <linux/log2.h>
+#include <linux/acpi.h>
+#include <linux/suspend.h>
+#include <linux/acpi.h>
+#include <asm/page.h>
+#include <asm/special_insns.h>
+#include <asm/msr-index.h>
+#include <asm/msr.h>
+#include <asm/cpufeature.h>
+#include <asm/tdx.h>
+#include <asm/intel-family.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+#include "tdx.h"
+
+static u32 tdx_global_keyid __ro_after_init;
+static u32 tdx_guest_keyid_start __ro_after_init;
+static u32 tdx_nr_guest_keyids __ro_after_init;
+
+static DEFINE_PER_CPU(bool, tdx_lp_initialized);
+
+static struct tdmr_info_list tdx_tdmr_list;
+
+static enum tdx_module_status_t tdx_module_status;
+static DEFINE_MUTEX(tdx_module_lock);
+
+/* All TDX-usable memory regions. Protected by mem_hotplug_lock. */
+static LIST_HEAD(tdx_memlist);
+
+typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args);
+
+static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args)
+{
+ pr_err("SEAMCALL (0x%016llx) failed: 0x%016llx\n", fn, err);
+}
+
+static inline void seamcall_err_ret(u64 fn, u64 err,
+ struct tdx_module_args *args)
+{
+ seamcall_err(fn, err, args);
+ pr_err("RCX 0x%016llx RDX 0x%016llx R08 0x%016llx\n",
+ args->rcx, args->rdx, args->r8);
+ pr_err("R09 0x%016llx R10 0x%016llx R11 0x%016llx\n",
+ args->r9, args->r10, args->r11);
+}
+
+static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func,
+ u64 fn, struct tdx_module_args *args)
+{
+ u64 sret = sc_retry(func, fn, args);
+
+ if (sret == TDX_SUCCESS)
+ return 0;
+
+ if (sret == TDX_SEAMCALL_VMFAILINVALID)
+ return -ENODEV;
+
+ if (sret == TDX_SEAMCALL_GP)
+ return -EOPNOTSUPP;
+
+ if (sret == TDX_SEAMCALL_UD)
+ return -EACCES;
+
+ err_func(fn, sret, args);
+ return -EIO;
+}
+
+#define seamcall_prerr(__fn, __args) \
+ sc_retry_prerr(__seamcall, seamcall_err, (__fn), (__args))
+
+#define seamcall_prerr_ret(__fn, __args) \
+ sc_retry_prerr(__seamcall_ret, seamcall_err_ret, (__fn), (__args))
+
+/*
+ * Do the module global initialization once and return its result.
+ * It can be done on any cpu. It's always called with interrupts
+ * disabled.
+ */
+static int try_init_module_global(void)
+{
+ struct tdx_module_args args = {};
+ static DEFINE_RAW_SPINLOCK(sysinit_lock);
+ static bool sysinit_done;
+ static int sysinit_ret;
+
+ lockdep_assert_irqs_disabled();
+
+ raw_spin_lock(&sysinit_lock);
+
+ if (sysinit_done)
+ goto out;
+
+ /* RCX is module attributes and all bits are reserved */
+ args.rcx = 0;
+ sysinit_ret = seamcall_prerr(TDH_SYS_INIT, &args);
+
+ /*
+ * The first SEAMCALL also detects the TDX module, thus
+ * it can fail due to the TDX module is not loaded.
+ * Dump message to let the user know.
+ */
+ if (sysinit_ret == -ENODEV)
+ pr_err("module not loaded\n");
+
+ sysinit_done = true;
+out:
+ raw_spin_unlock(&sysinit_lock);
+ return sysinit_ret;
+}
+
+/**
+ * tdx_cpu_enable - Enable TDX on local cpu
+ *
+ * Do one-time TDX module per-cpu initialization SEAMCALL (and TDX module
+ * global initialization SEAMCALL if not done) on local cpu to make this
+ * cpu be ready to run any other SEAMCALLs.
+ *
+ * Always call this function via IPI function calls.
+ *
+ * Return 0 on success, otherwise errors.
+ */
+int tdx_cpu_enable(void)
+{
+ struct tdx_module_args args = {};
+ int ret;
+
+ if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
+ return -ENODEV;
+
+ lockdep_assert_irqs_disabled();
+
+ if (__this_cpu_read(tdx_lp_initialized))
+ return 0;
+
+ /*
+ * The TDX module global initialization is the very first step
+ * to enable TDX. Need to do it first (if hasn't been done)
+ * before the per-cpu initialization.
+ */
+ ret = try_init_module_global();
+ if (ret)
+ return ret;
+
+ ret = seamcall_prerr(TDH_SYS_LP_INIT, &args);
+ if (ret)
+ return ret;
+
+ __this_cpu_write(tdx_lp_initialized, true);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tdx_cpu_enable);
+
+/*
+ * Add a memory region as a TDX memory block. The caller must make sure
+ * all memory regions are added in address ascending order and don't
+ * overlap.
+ */
+static int add_tdx_memblock(struct list_head *tmb_list, unsigned long start_pfn,
+ unsigned long end_pfn, int nid)
+{
+ struct tdx_memblock *tmb;
+
+ tmb = kmalloc(sizeof(*tmb), GFP_KERNEL);
+ if (!tmb)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tmb->list);
+ tmb->start_pfn = start_pfn;
+ tmb->end_pfn = end_pfn;
+ tmb->nid = nid;
+
+ /* @tmb_list is protected by mem_hotplug_lock */
+ list_add_tail(&tmb->list, tmb_list);
+ return 0;
+}
+
+static void free_tdx_memlist(struct list_head *tmb_list)
+{
+ /* @tmb_list is protected by mem_hotplug_lock */
+ while (!list_empty(tmb_list)) {
+ struct tdx_memblock *tmb = list_first_entry(tmb_list,
+ struct tdx_memblock, list);
+
+ list_del(&tmb->list);
+ kfree(tmb);
+ }
+}
+
+/*
+ * Ensure that all memblock memory regions are convertible to TDX
+ * memory. Once this has been established, stash the memblock
+ * ranges off in a secondary structure because memblock is modified
+ * in memory hotplug while TDX memory regions are fixed.
+ */
+static int build_tdx_memlist(struct list_head *tmb_list)
+{
+ unsigned long start_pfn, end_pfn;
+ int i, nid, ret;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+ /*
+ * The first 1MB is not reported as TDX convertible memory.
+ * Although the first 1MB is always reserved and won't end up
+ * to the page allocator, it is still in memblock's memory
+ * regions. Skip them manually to exclude them as TDX memory.
+ */
+ start_pfn = max(start_pfn, PHYS_PFN(SZ_1M));
+ if (start_pfn >= end_pfn)
+ continue;
+
+ /*
+ * Add the memory regions as TDX memory. The regions in
+ * memblock has already guaranteed they are in address
+ * ascending order and don't overlap.
+ */
+ ret = add_tdx_memblock(tmb_list, start_pfn, end_pfn, nid);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ free_tdx_memlist(tmb_list);
+ return ret;
+}
+
+static int read_sys_metadata_field(u64 field_id, u64 *data)
+{
+ struct tdx_module_args args = {};
+ int ret;
+
+ /*
+ * TDH.SYS.RD -- reads one global metadata field
+ * - RDX (in): the field to read
+ * - R8 (out): the field data
+ */
+ args.rdx = field_id;
+ ret = seamcall_prerr_ret(TDH_SYS_RD, &args);
+ if (ret)
+ return ret;
+
+ *data = args.r8;
+
+ return 0;
+}
+
+static int read_sys_metadata_field16(u64 field_id,
+ int offset,
+ struct tdx_tdmr_sysinfo *ts)
+{
+ u16 *ts_member = ((void *)ts) + offset;
+ u64 tmp;
+ int ret;
+
+ if (WARN_ON_ONCE(MD_FIELD_ID_ELE_SIZE_CODE(field_id) !=
+ MD_FIELD_ID_ELE_SIZE_16BIT))
+ return -EINVAL;
+
+ ret = read_sys_metadata_field(field_id, &tmp);
+ if (ret)
+ return ret;
+
+ *ts_member = tmp;
+
+ return 0;
+}
+
+struct field_mapping {
+ u64 field_id;
+ int offset;
+};
+
+#define TD_SYSINFO_MAP(_field_id, _offset) \
+ { .field_id = MD_FIELD_ID_##_field_id, \
+ .offset = offsetof(struct tdx_tdmr_sysinfo, _offset) }
+
+/* Map TD_SYSINFO fields into 'struct tdx_tdmr_sysinfo': */
+static const struct field_mapping fields[] = {
+ TD_SYSINFO_MAP(MAX_TDMRS, max_tdmrs),
+ TD_SYSINFO_MAP(MAX_RESERVED_PER_TDMR, max_reserved_per_tdmr),
+ TD_SYSINFO_MAP(PAMT_4K_ENTRY_SIZE, pamt_entry_size[TDX_PS_4K]),
+ TD_SYSINFO_MAP(PAMT_2M_ENTRY_SIZE, pamt_entry_size[TDX_PS_2M]),
+ TD_SYSINFO_MAP(PAMT_1G_ENTRY_SIZE, pamt_entry_size[TDX_PS_1G]),
+};
+
+static int get_tdx_tdmr_sysinfo(struct tdx_tdmr_sysinfo *tdmr_sysinfo)
+{
+ int ret;
+ int i;
+
+ /* Populate 'tdmr_sysinfo' fields using the mapping structure above: */
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ ret = read_sys_metadata_field16(fields[i].field_id,
+ fields[i].offset,
+ tdmr_sysinfo);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Calculate the actual TDMR size */
+static int tdmr_size_single(u16 max_reserved_per_tdmr)
+{
+ int tdmr_sz;
+
+ /*
+ * The actual size of TDMR depends on the maximum
+ * number of reserved areas.
+ */
+ tdmr_sz = sizeof(struct tdmr_info);
+ tdmr_sz += sizeof(struct tdmr_reserved_area) * max_reserved_per_tdmr;
+
+ return ALIGN(tdmr_sz, TDMR_INFO_ALIGNMENT);
+}
+
+static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list,
+ struct tdx_tdmr_sysinfo *tdmr_sysinfo)
+{
+ size_t tdmr_sz, tdmr_array_sz;
+ void *tdmr_array;
+
+ tdmr_sz = tdmr_size_single(tdmr_sysinfo->max_reserved_per_tdmr);
+ tdmr_array_sz = tdmr_sz * tdmr_sysinfo->max_tdmrs;
+
+ /*
+ * To keep things simple, allocate all TDMRs together.
+ * The buffer needs to be physically contiguous to make
+ * sure each TDMR is physically contiguous.
+ */
+ tdmr_array = alloc_pages_exact(tdmr_array_sz,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!tdmr_array)
+ return -ENOMEM;
+
+ tdmr_list->tdmrs = tdmr_array;
+
+ /*
+ * Keep the size of TDMR to find the target TDMR
+ * at a given index in the TDMR list.
+ */
+ tdmr_list->tdmr_sz = tdmr_sz;
+ tdmr_list->max_tdmrs = tdmr_sysinfo->max_tdmrs;
+ tdmr_list->nr_consumed_tdmrs = 0;
+
+ return 0;
+}
+
+static void free_tdmr_list(struct tdmr_info_list *tdmr_list)
+{
+ free_pages_exact(tdmr_list->tdmrs,
+ tdmr_list->max_tdmrs * tdmr_list->tdmr_sz);
+}
+
+/* Get the TDMR from the list at the given index. */
+static struct tdmr_info *tdmr_entry(struct tdmr_info_list *tdmr_list,
+ int idx)
+{
+ int tdmr_info_offset = tdmr_list->tdmr_sz * idx;
+
+ return (void *)tdmr_list->tdmrs + tdmr_info_offset;
+}
+
+#define TDMR_ALIGNMENT SZ_1G
+#define TDMR_ALIGN_DOWN(_addr) ALIGN_DOWN((_addr), TDMR_ALIGNMENT)
+#define TDMR_ALIGN_UP(_addr) ALIGN((_addr), TDMR_ALIGNMENT)
+
+static inline u64 tdmr_end(struct tdmr_info *tdmr)
+{
+ return tdmr->base + tdmr->size;
+}
+
+/*
+ * Take the memory referenced in @tmb_list and populate the
+ * preallocated @tdmr_list, following all the special alignment
+ * and size rules for TDMR.
+ */
+static int fill_out_tdmrs(struct list_head *tmb_list,
+ struct tdmr_info_list *tdmr_list)
+{
+ struct tdx_memblock *tmb;
+ int tdmr_idx = 0;
+
+ /*
+ * Loop over TDX memory regions and fill out TDMRs to cover them.
+ * To keep it simple, always try to use one TDMR to cover one
+ * memory region.
+ *
+ * In practice TDX supports at least 64 TDMRs. A 2-socket system
+ * typically only consumes less than 10 of those. This code is
+ * dumb and simple and may use more TMDRs than is strictly
+ * required.
+ */
+ list_for_each_entry(tmb, tmb_list, list) {
+ struct tdmr_info *tdmr = tdmr_entry(tdmr_list, tdmr_idx);
+ u64 start, end;
+
+ start = TDMR_ALIGN_DOWN(PFN_PHYS(tmb->start_pfn));
+ end = TDMR_ALIGN_UP(PFN_PHYS(tmb->end_pfn));
+
+ /*
+ * A valid size indicates the current TDMR has already
+ * been filled out to cover the previous memory region(s).
+ */
+ if (tdmr->size) {
+ /*
+ * Loop to the next if the current memory region
+ * has already been fully covered.
+ */
+ if (end <= tdmr_end(tdmr))
+ continue;
+
+ /* Otherwise, skip the already covered part. */
+ if (start < tdmr_end(tdmr))
+ start = tdmr_end(tdmr);
+
+ /*
+ * Create a new TDMR to cover the current memory
+ * region, or the remaining part of it.
+ */
+ tdmr_idx++;
+ if (tdmr_idx >= tdmr_list->max_tdmrs) {
+ pr_warn("initialization failed: TDMRs exhausted.\n");
+ return -ENOSPC;
+ }
+
+ tdmr = tdmr_entry(tdmr_list, tdmr_idx);
+ }
+
+ tdmr->base = start;
+ tdmr->size = end - start;
+ }
+
+ /* @tdmr_idx is always the index of the last valid TDMR. */
+ tdmr_list->nr_consumed_tdmrs = tdmr_idx + 1;
+
+ /*
+ * Warn early that kernel is about to run out of TDMRs.
+ *
+ * This is an indication that TDMR allocation has to be
+ * reworked to be smarter to not run into an issue.
+ */
+ if (tdmr_list->max_tdmrs - tdmr_list->nr_consumed_tdmrs < TDMR_NR_WARN)
+ pr_warn("consumed TDMRs reaching limit: %d used out of %d\n",
+ tdmr_list->nr_consumed_tdmrs,
+ tdmr_list->max_tdmrs);
+
+ return 0;
+}
+
+/*
+ * Calculate PAMT size given a TDMR and a page size. The returned
+ * PAMT size is always aligned up to 4K page boundary.
+ */
+static unsigned long tdmr_get_pamt_sz(struct tdmr_info *tdmr, int pgsz,
+ u16 pamt_entry_size)
+{
+ unsigned long pamt_sz, nr_pamt_entries;
+
+ switch (pgsz) {
+ case TDX_PS_4K:
+ nr_pamt_entries = tdmr->size >> PAGE_SHIFT;
+ break;
+ case TDX_PS_2M:
+ nr_pamt_entries = tdmr->size >> PMD_SHIFT;
+ break;
+ case TDX_PS_1G:
+ nr_pamt_entries = tdmr->size >> PUD_SHIFT;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ pamt_sz = nr_pamt_entries * pamt_entry_size;
+ /* TDX requires PAMT size must be 4K aligned */
+ pamt_sz = ALIGN(pamt_sz, PAGE_SIZE);
+
+ return pamt_sz;
+}
+
+/*
+ * Locate a NUMA node which should hold the allocation of the @tdmr
+ * PAMT. This node will have some memory covered by the TDMR. The
+ * relative amount of memory covered is not considered.
+ */
+static int tdmr_get_nid(struct tdmr_info *tdmr, struct list_head *tmb_list)
+{
+ struct tdx_memblock *tmb;
+
+ /*
+ * A TDMR must cover at least part of one TMB. That TMB will end
+ * after the TDMR begins. But, that TMB may have started before
+ * the TDMR. Find the next 'tmb' that _ends_ after this TDMR
+ * begins. Ignore 'tmb' start addresses. They are irrelevant.
+ */
+ list_for_each_entry(tmb, tmb_list, list) {
+ if (tmb->end_pfn > PHYS_PFN(tdmr->base))
+ return tmb->nid;
+ }
+
+ /*
+ * Fall back to allocating the TDMR's metadata from node 0 when
+ * no TDX memory block can be found. This should never happen
+ * since TDMRs originate from TDX memory blocks.
+ */
+ pr_warn("TDMR [0x%llx, 0x%llx): unable to find local NUMA node for PAMT allocation, fallback to use node 0.\n",
+ tdmr->base, tdmr_end(tdmr));
+ return 0;
+}
+
+/*
+ * Allocate PAMTs from the local NUMA node of some memory in @tmb_list
+ * within @tdmr, and set up PAMTs for @tdmr.
+ */
+static int tdmr_set_up_pamt(struct tdmr_info *tdmr,
+ struct list_head *tmb_list,
+ u16 pamt_entry_size[])
+{
+ unsigned long pamt_base[TDX_PS_NR];
+ unsigned long pamt_size[TDX_PS_NR];
+ unsigned long tdmr_pamt_base;
+ unsigned long tdmr_pamt_size;
+ struct page *pamt;
+ int pgsz, nid;
+
+ nid = tdmr_get_nid(tdmr, tmb_list);
+
+ /*
+ * Calculate the PAMT size for each TDX supported page size
+ * and the total PAMT size.
+ */
+ tdmr_pamt_size = 0;
+ for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
+ pamt_size[pgsz] = tdmr_get_pamt_sz(tdmr, pgsz,
+ pamt_entry_size[pgsz]);
+ tdmr_pamt_size += pamt_size[pgsz];
+ }
+
+ /*
+ * Allocate one chunk of physically contiguous memory for all
+ * PAMTs. This helps minimize the PAMT's use of reserved areas
+ * in overlapped TDMRs.
+ */
+ pamt = alloc_contig_pages(tdmr_pamt_size >> PAGE_SHIFT, GFP_KERNEL,
+ nid, &node_online_map);
+ if (!pamt)
+ return -ENOMEM;
+
+ /*
+ * Break the contiguous allocation back up into the
+ * individual PAMTs for each page size.
+ */
+ tdmr_pamt_base = page_to_pfn(pamt) << PAGE_SHIFT;
+ for (pgsz = TDX_PS_4K; pgsz < TDX_PS_NR; pgsz++) {
+ pamt_base[pgsz] = tdmr_pamt_base;
+ tdmr_pamt_base += pamt_size[pgsz];
+ }
+
+ tdmr->pamt_4k_base = pamt_base[TDX_PS_4K];
+ tdmr->pamt_4k_size = pamt_size[TDX_PS_4K];
+ tdmr->pamt_2m_base = pamt_base[TDX_PS_2M];
+ tdmr->pamt_2m_size = pamt_size[TDX_PS_2M];
+ tdmr->pamt_1g_base = pamt_base[TDX_PS_1G];
+ tdmr->pamt_1g_size = pamt_size[TDX_PS_1G];
+
+ return 0;
+}
+
+static void tdmr_get_pamt(struct tdmr_info *tdmr, unsigned long *pamt_base,
+ unsigned long *pamt_size)
+{
+ unsigned long pamt_bs, pamt_sz;
+
+ /*
+ * The PAMT was allocated in one contiguous unit. The 4K PAMT
+ * should always point to the beginning of that allocation.
+ */
+ pamt_bs = tdmr->pamt_4k_base;
+ pamt_sz = tdmr->pamt_4k_size + tdmr->pamt_2m_size + tdmr->pamt_1g_size;
+
+ WARN_ON_ONCE((pamt_bs & ~PAGE_MASK) || (pamt_sz & ~PAGE_MASK));
+
+ *pamt_base = pamt_bs;
+ *pamt_size = pamt_sz;
+}
+
+static void tdmr_do_pamt_func(struct tdmr_info *tdmr,
+ void (*pamt_func)(unsigned long base, unsigned long size))
+{
+ unsigned long pamt_base, pamt_size;
+
+ tdmr_get_pamt(tdmr, &pamt_base, &pamt_size);
+
+ /* Do nothing if PAMT hasn't been allocated for this TDMR */
+ if (!pamt_size)
+ return;
+
+ if (WARN_ON_ONCE(!pamt_base))
+ return;
+
+ pamt_func(pamt_base, pamt_size);
+}
+
+static void free_pamt(unsigned long pamt_base, unsigned long pamt_size)
+{
+ free_contig_range(pamt_base >> PAGE_SHIFT, pamt_size >> PAGE_SHIFT);
+}
+
+static void tdmr_free_pamt(struct tdmr_info *tdmr)
+{
+ tdmr_do_pamt_func(tdmr, free_pamt);
+}
+
+static void tdmrs_free_pamt_all(struct tdmr_info_list *tdmr_list)
+{
+ int i;
+
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
+ tdmr_free_pamt(tdmr_entry(tdmr_list, i));
+}
+
+/* Allocate and set up PAMTs for all TDMRs */
+static int tdmrs_set_up_pamt_all(struct tdmr_info_list *tdmr_list,
+ struct list_head *tmb_list,
+ u16 pamt_entry_size[])
+{
+ int i, ret = 0;
+
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
+ ret = tdmr_set_up_pamt(tdmr_entry(tdmr_list, i), tmb_list,
+ pamt_entry_size);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ tdmrs_free_pamt_all(tdmr_list);
+ return ret;
+}
+
+/*
+ * Convert TDX private pages back to normal by using MOVDIR64B to
+ * clear these pages. Note this function doesn't flush cache of
+ * these TDX private pages. The caller should make sure of that.
+ */
+static void reset_tdx_pages(unsigned long base, unsigned long size)
+{
+ const void *zero_page = (const void *)page_address(ZERO_PAGE(0));
+ unsigned long phys, end;
+
+ end = base + size;
+ for (phys = base; phys < end; phys += 64)
+ movdir64b(__va(phys), zero_page);
+
+ /*
+ * MOVDIR64B uses WC protocol. Use memory barrier to
+ * make sure any later user of these pages sees the
+ * updated data.
+ */
+ mb();
+}
+
+static void tdmr_reset_pamt(struct tdmr_info *tdmr)
+{
+ tdmr_do_pamt_func(tdmr, reset_tdx_pages);
+}
+
+static void tdmrs_reset_pamt_all(struct tdmr_info_list *tdmr_list)
+{
+ int i;
+
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
+ tdmr_reset_pamt(tdmr_entry(tdmr_list, i));
+}
+
+static unsigned long tdmrs_count_pamt_kb(struct tdmr_info_list *tdmr_list)
+{
+ unsigned long pamt_size = 0;
+ int i;
+
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
+ unsigned long base, size;
+
+ tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
+ pamt_size += size;
+ }
+
+ return pamt_size / 1024;
+}
+
+static int tdmr_add_rsvd_area(struct tdmr_info *tdmr, int *p_idx, u64 addr,
+ u64 size, u16 max_reserved_per_tdmr)
+{
+ struct tdmr_reserved_area *rsvd_areas = tdmr->reserved_areas;
+ int idx = *p_idx;
+
+ /* Reserved area must be 4K aligned in offset and size */
+ if (WARN_ON(addr & ~PAGE_MASK || size & ~PAGE_MASK))
+ return -EINVAL;
+
+ if (idx >= max_reserved_per_tdmr) {
+ pr_warn("initialization failed: TDMR [0x%llx, 0x%llx): reserved areas exhausted.\n",
+ tdmr->base, tdmr_end(tdmr));
+ return -ENOSPC;
+ }
+
+ /*
+ * Consume one reserved area per call. Make no effort to
+ * optimize or reduce the number of reserved areas which are
+ * consumed by contiguous reserved areas, for instance.
+ */
+ rsvd_areas[idx].offset = addr - tdmr->base;
+ rsvd_areas[idx].size = size;
+
+ *p_idx = idx + 1;
+
+ return 0;
+}
+
+/*
+ * Go through @tmb_list to find holes between memory areas. If any of
+ * those holes fall within @tdmr, set up a TDMR reserved area to cover
+ * the hole.
+ */
+static int tdmr_populate_rsvd_holes(struct list_head *tmb_list,
+ struct tdmr_info *tdmr,
+ int *rsvd_idx,
+ u16 max_reserved_per_tdmr)
+{
+ struct tdx_memblock *tmb;
+ u64 prev_end;
+ int ret;
+
+ /*
+ * Start looking for reserved blocks at the
+ * beginning of the TDMR.
+ */
+ prev_end = tdmr->base;
+ list_for_each_entry(tmb, tmb_list, list) {
+ u64 start, end;
+
+ start = PFN_PHYS(tmb->start_pfn);
+ end = PFN_PHYS(tmb->end_pfn);
+
+ /* Break if this region is after the TDMR */
+ if (start >= tdmr_end(tdmr))
+ break;
+
+ /* Exclude regions before this TDMR */
+ if (end < tdmr->base)
+ continue;
+
+ /*
+ * Skip over memory areas that
+ * have already been dealt with.
+ */
+ if (start <= prev_end) {
+ prev_end = end;
+ continue;
+ }
+
+ /* Add the hole before this region */
+ ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
+ start - prev_end,
+ max_reserved_per_tdmr);
+ if (ret)
+ return ret;
+
+ prev_end = end;
+ }
+
+ /* Add the hole after the last region if it exists. */
+ if (prev_end < tdmr_end(tdmr)) {
+ ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, prev_end,
+ tdmr_end(tdmr) - prev_end,
+ max_reserved_per_tdmr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Go through @tdmr_list to find all PAMTs. If any of those PAMTs
+ * overlaps with @tdmr, set up a TDMR reserved area to cover the
+ * overlapping part.
+ */
+static int tdmr_populate_rsvd_pamts(struct tdmr_info_list *tdmr_list,
+ struct tdmr_info *tdmr,
+ int *rsvd_idx,
+ u16 max_reserved_per_tdmr)
+{
+ int i, ret;
+
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
+ struct tdmr_info *tmp = tdmr_entry(tdmr_list, i);
+ unsigned long pamt_base, pamt_size, pamt_end;
+
+ tdmr_get_pamt(tmp, &pamt_base, &pamt_size);
+ /* Each TDMR must already have PAMT allocated */
+ WARN_ON_ONCE(!pamt_size || !pamt_base);
+
+ pamt_end = pamt_base + pamt_size;
+ /* Skip PAMTs outside of the given TDMR */
+ if ((pamt_end <= tdmr->base) ||
+ (pamt_base >= tdmr_end(tdmr)))
+ continue;
+
+ /* Only mark the part within the TDMR as reserved */
+ if (pamt_base < tdmr->base)
+ pamt_base = tdmr->base;
+ if (pamt_end > tdmr_end(tdmr))
+ pamt_end = tdmr_end(tdmr);
+
+ ret = tdmr_add_rsvd_area(tdmr, rsvd_idx, pamt_base,
+ pamt_end - pamt_base,
+ max_reserved_per_tdmr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Compare function called by sort() for TDMR reserved areas */
+static int rsvd_area_cmp_func(const void *a, const void *b)
+{
+ struct tdmr_reserved_area *r1 = (struct tdmr_reserved_area *)a;
+ struct tdmr_reserved_area *r2 = (struct tdmr_reserved_area *)b;
+
+ if (r1->offset + r1->size <= r2->offset)
+ return -1;
+ if (r1->offset >= r2->offset + r2->size)
+ return 1;
+
+ /* Reserved areas cannot overlap. The caller must guarantee. */
+ WARN_ON_ONCE(1);
+ return -1;
+}
+
+/*
+ * Populate reserved areas for the given @tdmr, including memory holes
+ * (via @tmb_list) and PAMTs (via @tdmr_list).
+ */
+static int tdmr_populate_rsvd_areas(struct tdmr_info *tdmr,
+ struct list_head *tmb_list,
+ struct tdmr_info_list *tdmr_list,
+ u16 max_reserved_per_tdmr)
+{
+ int ret, rsvd_idx = 0;
+
+ ret = tdmr_populate_rsvd_holes(tmb_list, tdmr, &rsvd_idx,
+ max_reserved_per_tdmr);
+ if (ret)
+ return ret;
+
+ ret = tdmr_populate_rsvd_pamts(tdmr_list, tdmr, &rsvd_idx,
+ max_reserved_per_tdmr);
+ if (ret)
+ return ret;
+
+ /* TDX requires reserved areas listed in address ascending order */
+ sort(tdmr->reserved_areas, rsvd_idx, sizeof(struct tdmr_reserved_area),
+ rsvd_area_cmp_func, NULL);
+
+ return 0;
+}
+
+/*
+ * Populate reserved areas for all TDMRs in @tdmr_list, including memory
+ * holes (via @tmb_list) and PAMTs.
+ */
+static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list,
+ struct list_head *tmb_list,
+ u16 max_reserved_per_tdmr)
+{
+ int i;
+
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
+ int ret;
+
+ ret = tdmr_populate_rsvd_areas(tdmr_entry(tdmr_list, i),
+ tmb_list, tdmr_list, max_reserved_per_tdmr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Construct a list of TDMRs on the preallocated space in @tdmr_list
+ * to cover all TDX memory regions in @tmb_list based on the TDX module
+ * TDMR global information in @tdmr_sysinfo.
+ */
+static int construct_tdmrs(struct list_head *tmb_list,
+ struct tdmr_info_list *tdmr_list,
+ struct tdx_tdmr_sysinfo *tdmr_sysinfo)
+{
+ int ret;
+
+ ret = fill_out_tdmrs(tmb_list, tdmr_list);
+ if (ret)
+ return ret;
+
+ ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list,
+ tdmr_sysinfo->pamt_entry_size);
+ if (ret)
+ return ret;
+
+ ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list,
+ tdmr_sysinfo->max_reserved_per_tdmr);
+ if (ret)
+ tdmrs_free_pamt_all(tdmr_list);
+
+ /*
+ * The tdmr_info_list is read-only from here on out.
+ * Ensure that these writes are seen by other CPUs.
+ * Pairs with a smp_rmb() in is_pamt_page().
+ */
+ smp_wmb();
+
+ return ret;
+}
+
+static int config_tdx_module(struct tdmr_info_list *tdmr_list, u64 global_keyid)
+{
+ struct tdx_module_args args = {};
+ u64 *tdmr_pa_array;
+ size_t array_sz;
+ int i, ret;
+
+ /*
+ * TDMRs are passed to the TDX module via an array of physical
+ * addresses of each TDMR. The array itself also has certain
+ * alignment requirement.
+ */
+ array_sz = tdmr_list->nr_consumed_tdmrs * sizeof(u64);
+ array_sz = roundup_pow_of_two(array_sz);
+ if (array_sz < TDMR_INFO_PA_ARRAY_ALIGNMENT)
+ array_sz = TDMR_INFO_PA_ARRAY_ALIGNMENT;
+
+ tdmr_pa_array = kzalloc(array_sz, GFP_KERNEL);
+ if (!tdmr_pa_array)
+ return -ENOMEM;
+
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++)
+ tdmr_pa_array[i] = __pa(tdmr_entry(tdmr_list, i));
+
+ args.rcx = __pa(tdmr_pa_array);
+ args.rdx = tdmr_list->nr_consumed_tdmrs;
+ args.r8 = global_keyid;
+ ret = seamcall_prerr(TDH_SYS_CONFIG, &args);
+
+ /* Free the array as it is not required anymore. */
+ kfree(tdmr_pa_array);
+
+ return ret;
+}
+
+static int do_global_key_config(void *unused)
+{
+ struct tdx_module_args args = {};
+
+ return seamcall_prerr(TDH_SYS_KEY_CONFIG, &args);
+}
+
+/*
+ * Attempt to configure the global KeyID on all physical packages.
+ *
+ * This requires running code on at least one CPU in each package.
+ * TDMR initialization) will fail will fail if any package in the
+ * system has no online CPUs.
+ *
+ * This code takes no affirmative steps to online CPUs. Callers (aka.
+ * KVM) can ensure success by ensuring sufficient CPUs are online and
+ * can run SEAMCALLs.
+ */
+static int config_global_keyid(void)
+{
+ cpumask_var_t packages;
+ int cpu, ret = -EINVAL;
+
+ if (!zalloc_cpumask_var(&packages, GFP_KERNEL))
+ return -ENOMEM;
+
+ /*
+ * Hardware doesn't guarantee cache coherency across different
+ * KeyIDs. The kernel needs to flush PAMT's dirty cachelines
+ * (associated with KeyID 0) before the TDX module can use the
+ * global KeyID to access the PAMT. Given PAMTs are potentially
+ * large (~1/256th of system RAM), just use WBINVD.
+ */
+ wbinvd_on_all_cpus();
+
+ for_each_online_cpu(cpu) {
+ /*
+ * The key configuration only needs to be done once per
+ * package and will return an error if configured more
+ * than once. Avoid doing it multiple times per package.
+ */
+ if (cpumask_test_and_set_cpu(topology_physical_package_id(cpu),
+ packages))
+ continue;
+
+ /*
+ * TDH.SYS.KEY.CONFIG cannot run concurrently on
+ * different cpus. Do it one by one.
+ */
+ ret = smp_call_on_cpu(cpu, do_global_key_config, NULL, true);
+ if (ret)
+ break;
+ }
+
+ free_cpumask_var(packages);
+ return ret;
+}
+
+static int init_tdmr(struct tdmr_info *tdmr)
+{
+ u64 next;
+
+ /*
+ * Initializing a TDMR can be time consuming. To avoid long
+ * SEAMCALLs, the TDX module may only initialize a part of the
+ * TDMR in each call.
+ */
+ do {
+ struct tdx_module_args args = {
+ .rcx = tdmr->base,
+ };
+ int ret;
+
+ ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args);
+ if (ret)
+ return ret;
+ /*
+ * RDX contains 'next-to-initialize' address if
+ * TDH.SYS.TDMR.INIT did not fully complete and
+ * should be retried.
+ */
+ next = args.rdx;
+ cond_resched();
+ /* Keep making SEAMCALLs until the TDMR is done */
+ } while (next < tdmr->base + tdmr->size);
+
+ return 0;
+}
+
+static int init_tdmrs(struct tdmr_info_list *tdmr_list)
+{
+ int i;
+
+ /*
+ * This operation is costly. It can be parallelized,
+ * but keep it simple for now.
+ */
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
+ int ret;
+
+ ret = init_tdmr(tdmr_entry(tdmr_list, i));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int init_tdx_module(void)
+{
+ struct tdx_tdmr_sysinfo tdmr_sysinfo;
+ int ret;
+
+ /*
+ * To keep things simple, assume that all TDX-protected memory
+ * will come from the page allocator. Make sure all pages in the
+ * page allocator are TDX-usable memory.
+ *
+ * Build the list of "TDX-usable" memory regions which cover all
+ * pages in the page allocator to guarantee that. Do it while
+ * holding mem_hotplug_lock read-lock as the memory hotplug code
+ * path reads the @tdx_memlist to reject any new memory.
+ */
+ get_online_mems();
+
+ ret = build_tdx_memlist(&tdx_memlist);
+ if (ret)
+ goto out_put_tdxmem;
+
+ ret = get_tdx_tdmr_sysinfo(&tdmr_sysinfo);
+ if (ret)
+ goto err_free_tdxmem;
+
+ /* Allocate enough space for constructing TDMRs */
+ ret = alloc_tdmr_list(&tdx_tdmr_list, &tdmr_sysinfo);
+ if (ret)
+ goto err_free_tdxmem;
+
+ /* Cover all TDX-usable memory regions in TDMRs */
+ ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdmr_sysinfo);
+ if (ret)
+ goto err_free_tdmrs;
+
+ /* Pass the TDMRs and the global KeyID to the TDX module */
+ ret = config_tdx_module(&tdx_tdmr_list, tdx_global_keyid);
+ if (ret)
+ goto err_free_pamts;
+
+ /* Config the key of global KeyID on all packages */
+ ret = config_global_keyid();
+ if (ret)
+ goto err_reset_pamts;
+
+ /* Initialize TDMRs to complete the TDX module initialization */
+ ret = init_tdmrs(&tdx_tdmr_list);
+ if (ret)
+ goto err_reset_pamts;
+
+ pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list));
+
+out_put_tdxmem:
+ /*
+ * @tdx_memlist is written here and read at memory hotplug time.
+ * Lock out memory hotplug code while building it.
+ */
+ put_online_mems();
+ return ret;
+
+err_reset_pamts:
+ /*
+ * Part of PAMTs may already have been initialized by the
+ * TDX module. Flush cache before returning PAMTs back
+ * to the kernel.
+ */
+ wbinvd_on_all_cpus();
+ /*
+ * According to the TDX hardware spec, if the platform
+ * doesn't have the "partial write machine check"
+ * erratum, any kernel read/write will never cause #MC
+ * in kernel space, thus it's OK to not convert PAMTs
+ * back to normal. But do the conversion anyway here
+ * as suggested by the TDX spec.
+ */
+ tdmrs_reset_pamt_all(&tdx_tdmr_list);
+err_free_pamts:
+ tdmrs_free_pamt_all(&tdx_tdmr_list);
+err_free_tdmrs:
+ free_tdmr_list(&tdx_tdmr_list);
+err_free_tdxmem:
+ free_tdx_memlist(&tdx_memlist);
+ goto out_put_tdxmem;
+}
+
+static int __tdx_enable(void)
+{
+ int ret;
+
+ ret = init_tdx_module();
+ if (ret) {
+ pr_err("module initialization failed (%d)\n", ret);
+ tdx_module_status = TDX_MODULE_ERROR;
+ return ret;
+ }
+
+ pr_info("module initialized\n");
+ tdx_module_status = TDX_MODULE_INITIALIZED;
+
+ return 0;
+}
+
+/**
+ * tdx_enable - Enable TDX module to make it ready to run TDX guests
+ *
+ * This function assumes the caller has: 1) held read lock of CPU hotplug
+ * lock to prevent any new cpu from becoming online; 2) done both VMXON
+ * and tdx_cpu_enable() on all online cpus.
+ *
+ * This function requires there's at least one online cpu for each CPU
+ * package to succeed.
+ *
+ * This function can be called in parallel by multiple callers.
+ *
+ * Return 0 if TDX is enabled successfully, otherwise error.
+ */
+int tdx_enable(void)
+{
+ int ret;
+
+ if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
+ return -ENODEV;
+
+ lockdep_assert_cpus_held();
+
+ mutex_lock(&tdx_module_lock);
+
+ switch (tdx_module_status) {
+ case TDX_MODULE_UNINITIALIZED:
+ ret = __tdx_enable();
+ break;
+ case TDX_MODULE_INITIALIZED:
+ /* Already initialized, great, tell the caller. */
+ ret = 0;
+ break;
+ default:
+ /* Failed to initialize in the previous attempts */
+ ret = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&tdx_module_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tdx_enable);
+
+static bool is_pamt_page(unsigned long phys)
+{
+ struct tdmr_info_list *tdmr_list = &tdx_tdmr_list;
+ int i;
+
+ /* Ensure that all remote 'tdmr_list' writes are visible: */
+ smp_rmb();
+
+ /*
+ * The TDX module is no longer returning TDX_SYS_NOT_READY and
+ * is initialized. The 'tdmr_list' was initialized long ago
+ * and is now read-only.
+ */
+ for (i = 0; i < tdmr_list->nr_consumed_tdmrs; i++) {
+ unsigned long base, size;
+
+ tdmr_get_pamt(tdmr_entry(tdmr_list, i), &base, &size);
+
+ if (phys >= base && phys < (base + size))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Return whether the memory page at the given physical address is TDX
+ * private memory or not.
+ *
+ * This can be imprecise for two known reasons:
+ * 1. PAMTs are private memory and exist before the TDX module is
+ * ready and TDH_PHYMEM_PAGE_RDMD works. This is a relatively
+ * short window that occurs once per boot.
+ * 2. TDH_PHYMEM_PAGE_RDMD reflects the TDX module's knowledge of the
+ * page. However, the page can still cause #MC until it has been
+ * fully converted to shared using 64-byte writes like MOVDIR64B.
+ * Buggy hosts might still leave #MC-causing memory in place which
+ * this function can not detect.
+ */
+static bool paddr_is_tdx_private(unsigned long phys)
+{
+ struct tdx_module_args args = {
+ .rcx = phys & PAGE_MASK,
+ };
+ u64 sret;
+
+ if (!boot_cpu_has(X86_FEATURE_TDX_HOST_PLATFORM))
+ return false;
+
+ /* Get page type from the TDX module */
+ sret = __seamcall_ret(TDH_PHYMEM_PAGE_RDMD, &args);
+
+ /*
+ * The SEAMCALL will not return success unless there is a
+ * working, "ready" TDX module. Assume an absence of TDX
+ * private pages until SEAMCALL is working.
+ */
+ if (sret)
+ return false;
+
+ /*
+ * SEAMCALL was successful -- read page type (via RCX):
+ *
+ * - PT_NDA: Page is not used by the TDX module
+ * - PT_RSVD: Reserved for Non-TDX use
+ * - Others: Page is used by the TDX module
+ *
+ * Note PAMT pages are marked as PT_RSVD but they are also TDX
+ * private memory.
+ */
+ switch (args.rcx) {
+ case PT_NDA:
+ return false;
+ case PT_RSVD:
+ return is_pamt_page(phys);
+ default:
+ return true;
+ }
+}
+
+/*
+ * Some TDX-capable CPUs have an erratum. A write to TDX private
+ * memory poisons that memory, and a subsequent read of that memory
+ * triggers #MC.
+ *
+ * Help distinguish erratum-triggered #MCs from a normal hardware one.
+ * Just print additional message to show such #MC may be result of the
+ * erratum.
+ */
+const char *tdx_dump_mce_info(struct mce *m)
+{
+ if (!m || !mce_is_memory_error(m) || !mce_usable_address(m))
+ return NULL;
+
+ if (!paddr_is_tdx_private(m->addr))
+ return NULL;
+
+ return "TDX private memory error. Possible kernel bug.";
+}
+
+static __init int record_keyid_partitioning(u32 *tdx_keyid_start,
+ u32 *nr_tdx_keyids)
+{
+ u32 _nr_mktme_keyids, _tdx_keyid_start, _nr_tdx_keyids;
+ int ret;
+
+ /*
+ * IA32_MKTME_KEYID_PARTIONING:
+ * Bit [31:0]: Number of MKTME KeyIDs.
+ * Bit [63:32]: Number of TDX private KeyIDs.
+ */
+ ret = rdmsr_safe(MSR_IA32_MKTME_KEYID_PARTITIONING, &_nr_mktme_keyids,
+ &_nr_tdx_keyids);
+ if (ret || !_nr_tdx_keyids)
+ return -EINVAL;
+
+ /* TDX KeyIDs start after the last MKTME KeyID. */
+ _tdx_keyid_start = _nr_mktme_keyids + 1;
+
+ *tdx_keyid_start = _tdx_keyid_start;
+ *nr_tdx_keyids = _nr_tdx_keyids;
+
+ return 0;
+}
+
+static bool is_tdx_memory(unsigned long start_pfn, unsigned long end_pfn)
+{
+ struct tdx_memblock *tmb;
+
+ /*
+ * This check assumes that the start_pfn<->end_pfn range does not
+ * cross multiple @tdx_memlist entries. A single memory online
+ * event across multiple memblocks (from which @tdx_memlist
+ * entries are derived at the time of module initialization) is
+ * not possible. This is because memory offline/online is done
+ * on granularity of 'struct memory_block', and the hotpluggable
+ * memory region (one memblock) must be multiple of memory_block.
+ */
+ list_for_each_entry(tmb, &tdx_memlist, list) {
+ if (start_pfn >= tmb->start_pfn && end_pfn <= tmb->end_pfn)
+ return true;
+ }
+ return false;
+}
+
+static int tdx_memory_notifier(struct notifier_block *nb, unsigned long action,
+ void *v)
+{
+ struct memory_notify *mn = v;
+
+ if (action != MEM_GOING_ONLINE)
+ return NOTIFY_OK;
+
+ /*
+ * Empty list means TDX isn't enabled. Allow any memory
+ * to go online.
+ */
+ if (list_empty(&tdx_memlist))
+ return NOTIFY_OK;
+
+ /*
+ * The TDX memory configuration is static and can not be
+ * changed. Reject onlining any memory which is outside of
+ * the static configuration whether it supports TDX or not.
+ */
+ if (is_tdx_memory(mn->start_pfn, mn->start_pfn + mn->nr_pages))
+ return NOTIFY_OK;
+
+ return NOTIFY_BAD;
+}
+
+static struct notifier_block tdx_memory_nb = {
+ .notifier_call = tdx_memory_notifier,
+};
+
+static void __init check_tdx_erratum(void)
+{
+ /*
+ * These CPUs have an erratum. A partial write from non-TD
+ * software (e.g. via MOVNTI variants or UC/WC mapping) to TDX
+ * private memory poisons that memory, and a subsequent read of
+ * that memory triggers #MC.
+ */
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
+ setup_force_cpu_bug(X86_BUG_TDX_PW_MCE);
+ }
+}
+
+void __init tdx_init(void)
+{
+ u32 tdx_keyid_start, nr_tdx_keyids;
+ int err;
+
+ err = record_keyid_partitioning(&tdx_keyid_start, &nr_tdx_keyids);
+ if (err)
+ return;
+
+ pr_info("BIOS enabled: private KeyID range [%u, %u)\n",
+ tdx_keyid_start, tdx_keyid_start + nr_tdx_keyids);
+
+ /*
+ * The TDX module itself requires one 'global KeyID' to protect
+ * its metadata. If there's only one TDX KeyID, there won't be
+ * any left for TDX guests thus there's no point to enable TDX
+ * at all.
+ */
+ if (nr_tdx_keyids < 2) {
+ pr_err("initialization failed: too few private KeyIDs available.\n");
+ return;
+ }
+
+ /*
+ * At this point, hibernation_available() indicates whether or
+ * not hibernation support has been permanently disabled.
+ */
+ if (hibernation_available()) {
+ pr_err("initialization failed: Hibernation support is enabled\n");
+ return;
+ }
+
+ err = register_memory_notifier(&tdx_memory_nb);
+ if (err) {
+ pr_err("initialization failed: register_memory_notifier() failed (%d)\n",
+ err);
+ return;
+ }
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+ pr_info("Disable ACPI S3. Turn off TDX in the BIOS to use ACPI S3.\n");
+ acpi_suspend_lowlevel = NULL;
+#endif
+
+ /*
+ * Just use the first TDX KeyID as the 'global KeyID' and
+ * leave the rest for TDX guests.
+ */
+ tdx_global_keyid = tdx_keyid_start;
+ tdx_guest_keyid_start = tdx_keyid_start + 1;
+ tdx_nr_guest_keyids = nr_tdx_keyids - 1;
+
+ setup_force_cpu_cap(X86_FEATURE_TDX_HOST_PLATFORM);
+
+ check_tdx_erratum();
+}
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
new file mode 100644
index 000000000000..b701f69485d3
--- /dev/null
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_VIRT_TDX_H
+#define _X86_VIRT_TDX_H
+
+#include <linux/bits.h>
+
+/*
+ * This file contains both macros and data structures defined by the TDX
+ * architecture and Linux defined software data structures and functions.
+ * The two should not be mixed together for better readability. The
+ * architectural definitions come first.
+ */
+
+/*
+ * TDX module SEAMCALL leaf functions
+ */
+#define TDH_PHYMEM_PAGE_RDMD 24
+#define TDH_SYS_KEY_CONFIG 31
+#define TDH_SYS_INIT 33
+#define TDH_SYS_RD 34
+#define TDH_SYS_LP_INIT 35
+#define TDH_SYS_TDMR_INIT 36
+#define TDH_SYS_CONFIG 45
+
+/* TDX page types */
+#define PT_NDA 0x0
+#define PT_RSVD 0x1
+
+/*
+ * Global scope metadata field ID.
+ *
+ * See Table "Global Scope Metadata", TDX module 1.5 ABI spec.
+ */
+#define MD_FIELD_ID_MAX_TDMRS 0x9100000100000008ULL
+#define MD_FIELD_ID_MAX_RESERVED_PER_TDMR 0x9100000100000009ULL
+#define MD_FIELD_ID_PAMT_4K_ENTRY_SIZE 0x9100000100000010ULL
+#define MD_FIELD_ID_PAMT_2M_ENTRY_SIZE 0x9100000100000011ULL
+#define MD_FIELD_ID_PAMT_1G_ENTRY_SIZE 0x9100000100000012ULL
+
+/*
+ * Sub-field definition of metadata field ID.
+ *
+ * See Table "MD_FIELD_ID (Metadata Field Identifier / Sequence Header)
+ * Definition", TDX module 1.5 ABI spec.
+ *
+ * - Bit 33:32: ELEMENT_SIZE_CODE -- size of a single element of metadata
+ *
+ * 0: 8 bits
+ * 1: 16 bits
+ * 2: 32 bits
+ * 3: 64 bits
+ */
+#define MD_FIELD_ID_ELE_SIZE_CODE(_field_id) \
+ (((_field_id) & GENMASK_ULL(33, 32)) >> 32)
+
+#define MD_FIELD_ID_ELE_SIZE_16BIT 1
+
+struct tdmr_reserved_area {
+ u64 offset;
+ u64 size;
+} __packed;
+
+#define TDMR_INFO_ALIGNMENT 512
+#define TDMR_INFO_PA_ARRAY_ALIGNMENT 512
+
+struct tdmr_info {
+ u64 base;
+ u64 size;
+ u64 pamt_1g_base;
+ u64 pamt_1g_size;
+ u64 pamt_2m_base;
+ u64 pamt_2m_size;
+ u64 pamt_4k_base;
+ u64 pamt_4k_size;
+ /*
+ * The actual number of reserved areas depends on the value of
+ * field MD_FIELD_ID_MAX_RESERVED_PER_TDMR in the TDX module
+ * global metadata.
+ */
+ DECLARE_FLEX_ARRAY(struct tdmr_reserved_area, reserved_areas);
+} __packed __aligned(TDMR_INFO_ALIGNMENT);
+
+/*
+ * Do not put any hardware-defined TDX structure representations below
+ * this comment!
+ */
+
+/* Kernel defined TDX module status during module initialization. */
+enum tdx_module_status_t {
+ TDX_MODULE_UNINITIALIZED,
+ TDX_MODULE_INITIALIZED,
+ TDX_MODULE_ERROR
+};
+
+struct tdx_memblock {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+ int nid;
+};
+
+/* "TDMR info" part of "Global Scope Metadata" for constructing TDMRs */
+struct tdx_tdmr_sysinfo {
+ u16 max_tdmrs;
+ u16 max_reserved_per_tdmr;
+ u16 pamt_entry_size[TDX_PS_NR];
+};
+
+/* Warn if kernel has less than TDMR_NR_WARN TDMRs after allocation */
+#define TDMR_NR_WARN 4
+
+struct tdmr_info_list {
+ void *tdmrs; /* Flexible array to hold 'tdmr_info's */
+ int nr_consumed_tdmrs; /* How many 'tdmr_info's are in use */
+
+ /* Metadata for finding target 'tdmr_info' and freeing @tdmrs */
+ int tdmr_sz; /* Size of one 'tdmr_info' */
+ int max_tdmrs; /* How many 'tdmr_info's are allocated */
+};
+
+#endif
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e031eaf36c99..6f248d87e496 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -144,7 +144,7 @@ config XTENSA_VARIANT_CUSTOM_NAME
depends on XTENSA_VARIANT_CUSTOM
help
Provide the name of a custom Xtensa processor variant.
- This CORENAME selects arch/xtensa/variant/CORENAME.
+ This CORENAME selects arch/xtensa/variants/CORENAME.
Don't forget you have to select MMU if you have one.
config XTENSA_VARIANT_NAME
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index bfd8e433ed62..4c14a02179eb 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -35,15 +35,19 @@ KBUILD_CFLAGS += -ffreestanding -D__linux__
KBUILD_CFLAGS += -pipe -mlongcalls -mtext-section-literals
KBUILD_CFLAGS += $(call cc-option,-mforce-no-pic,)
KBUILD_CFLAGS += $(call cc-option,-mno-serialize-volatile,)
+KBUILD_CFLAGS += $(call cc-option,-mno-fdpic,)
ifneq ($(CONFIG_KERNEL_ABI_CALL0),)
KBUILD_CFLAGS += -mabi=call0
KBUILD_AFLAGS += -mabi=call0
endif
KBUILD_AFLAGS += -mlongcalls -mtext-section-literals
+KBUILD_AFLAGS += $(call cc-option,-mno-fdpic,)
+
+KBUILD_LDFLAGS += -m elf32xtensa
ifneq ($(CONFIG_LD_NO_RELAX),)
-KBUILD_LDFLAGS := --no-relax
+KBUILD_LDFLAGS += --no-relax
endif
CHECKFLAGS += -D$(if $(CONFIG_CPU_BIG_ENDIAN),__XTENSA_EB__,__XTENSA_EL__)
diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h
index 01bf7d9dbb19..a52d49a16ce7 100644
--- a/arch/xtensa/include/asm/asmmacro.h
+++ b/arch/xtensa/include/asm/asmmacro.h
@@ -11,7 +11,7 @@
#ifndef _XTENSA_ASMMACRO_H
#define _XTENSA_ASMMACRO_H
-#include <asm-generic/export.h>
+#include <linux/export.h>
#include <asm/core.h>
/*
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index 785a00ce83c1..38bcecb0e457 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -116,8 +116,9 @@ void flush_cache_page(struct vm_area_struct*,
#define flush_cache_mm(mm) flush_cache_all()
#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
-#define flush_cache_vmap(start,end) flush_cache_all()
-#define flush_cache_vunmap(start,end) flush_cache_all()
+#define flush_cache_vmap(start,end) flush_cache_all()
+#define flush_cache_vmap_early(start,end) do { } while (0)
+#define flush_cache_vunmap(start,end) flush_cache_all()
void flush_dcache_folio(struct folio *folio);
#define flush_dcache_folio flush_dcache_folio
@@ -140,6 +141,7 @@ void local_flush_cache_page(struct vm_area_struct *vma,
#define flush_cache_dup_mm(mm) do { } while (0)
#define flush_cache_vmap(start,end) do { } while (0)
+#define flush_cache_vmap_early(start,end) do { } while (0)
#define flush_cache_vunmap(start,end) do { } while (0)
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
diff --git a/arch/xtensa/lib/pci-auto.c b/arch/xtensa/lib/pci-auto.c
index aa6752237985..05fc02f9e1c7 100644
--- a/arch/xtensa/lib/pci-auto.c
+++ b/arch/xtensa/lib/pci-auto.c
@@ -11,6 +11,7 @@
* Based on work from Matt Porter <mporter@mvista.com>
*/
+#include <linux/bitfield.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/pci.h>
@@ -222,10 +223,11 @@ pciauto_postscan_setup_bridge(struct pci_dev *dev, int current_bus, int sub_bus,
int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus)
{
- int sub_bus, pci_devfn, pci_class, cmdstat, found_multi=0;
+ int sub_bus, pci_devfn, pci_class, cmdstat;
unsigned short vid;
unsigned char header_type;
struct pci_dev *dev = &pciauto_dev;
+ bool found_multi = false;
pciauto_dev.bus = &pciauto_bus;
pciauto_dev.sysdata = pci_ctrl;
@@ -261,11 +263,11 @@ int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus)
continue;
if (!PCI_FUNC(pci_devfn))
- found_multi = header_type & 0x80;
+ found_multi = FIELD_GET(PCI_HEADER_TYPE_MFD, header_type);
pci_read_config_word(dev, PCI_VENDOR_ID, &vid);
if (vid == 0xffff || vid == 0x0000) {
- found_multi = 0;
+ found_multi = false;
continue;
}
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 7d1f8b398a46..8896e691c051 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -65,7 +65,7 @@ static void rs_poll(struct timer_list *unused)
struct tty_port *port = &serial_port;
int i = 0;
int rd = 1;
- unsigned char c;
+ u8 c;
while (simc_poll(0)) {
rd = simc_read(0, &c, 1);