summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig23
-rw-r--r--arch/arm/boot/dts/dra7-l4.dtsi4
-rw-r--r--arch/arm/boot/dts/dra7.dtsi20
-rw-r--r--arch/arm/crypto/aes-cipher-core.S42
-rw-r--r--arch/arm/crypto/blake2b-neon-glue.c4
-rw-r--r--arch/arm/crypto/blake2s-core.S21
-rw-r--r--arch/arm/crypto/chacha-scalar-core.S43
-rw-r--r--arch/arm/crypto/curve25519-core.S2
-rw-r--r--arch/arm/crypto/poly1305-glue.c2
-rw-r--r--arch/arm/include/asm/paravirt.h14
-rw-r--r--arch/arm/kernel/paravirt.c9
-rw-r--r--arch/arm/mach-pxa/pxa_cplds_irqs.c24
-rw-r--r--arch/arm64/Kconfig42
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186.dtsi2
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts3
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi4
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi4
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi1
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/aes-modes.S3
-rw-r--r--arch/arm64/crypto/poly1305-glue.c2
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S2
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S2
-rw-r--r--arch/arm64/crypto/sha3-ce-core.S4
-rw-r--r--arch/arm64/crypto/sha512-ce-core.S2
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h2
-rw-r--r--arch/arm64/include/asm/arch_timer.h21
-rw-r--r--arch/arm64/include/asm/asm_pointer_auth.h20
-rw-r--r--arch/arm64/include/asm/assembler.h114
-rw-r--r--arch/arm64/include/asm/barrier.h23
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h17
-rw-r--r--arch/arm64/include/asm/daifflags.h10
-rw-r--r--arch/arm64/include/asm/el2_setup.h21
-rw-r--r--arch/arm64/include/asm/fpsimd.h1
-rw-r--r--arch/arm64/include/asm/irq.h4
-rw-r--r--arch/arm64/include/asm/irq_work.h2
-rw-r--r--arch/arm64/include/asm/irqflags.h16
-rw-r--r--arch/arm64/include/asm/memory.h4
-rw-r--r--arch/arm64/include/asm/mte-kasan.h9
-rw-r--r--arch/arm64/include/asm/mte.h54
-rw-r--r--arch/arm64/include/asm/paravirt.h14
-rw-r--r--arch/arm64/include/asm/pgalloc.h19
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h15
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h5
-rw-r--r--arch/arm64/include/asm/pgtable.h31
-rw-r--r--arch/arm64/include/asm/pointer_auth.h61
-rw-r--r--arch/arm64/include/asm/processor.h13
-rw-r--r--arch/arm64/include/asm/ptdump.h2
-rw-r--r--arch/arm64/include/asm/smp.h1
-rw-r--r--arch/arm64/include/asm/stacktrace.h24
-rw-r--r--arch/arm64/include/asm/sysreg.h13
-rw-r--r--arch/arm64/include/asm/uaccess.h22
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h6
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h4
-rw-r--r--arch/arm64/kernel/Makefile5
-rw-r--r--arch/arm64/kernel/asm-offsets.c7
-rw-r--r--arch/arm64/kernel/cpufeature.c22
-rw-r--r--arch/arm64/kernel/entry-common.c6
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S5
-rw-r--r--arch/arm64/kernel/entry.S174
-rw-r--r--arch/arm64/kernel/fpsimd.c39
-rw-r--r--arch/arm64/kernel/head.S39
-rw-r--r--arch/arm64/kernel/hyp-stub.S10
-rw-r--r--arch/arm64/kernel/idreg-override.c26
-rw-r--r--arch/arm64/kernel/irq.c35
-rw-r--r--arch/arm64/kernel/kaslr.c18
-rw-r--r--arch/arm64/kernel/module.c16
-rw-r--r--arch/arm64/kernel/mte.c121
-rw-r--r--arch/arm64/kernel/paravirt.c13
-rw-r--r--arch/arm64/kernel/perf_event.c5
-rw-r--r--arch/arm64/kernel/pointer_auth.c63
-rw-r--r--arch/arm64/kernel/probes/kprobes.c3
-rw-r--r--arch/arm64/kernel/process.c35
-rw-r--r--arch/arm64/kernel/ptrace.c41
-rw-r--r--arch/arm64/kernel/smp.c1
-rw-r--r--arch/arm64/kernel/stacktrace.c24
-rw-r--r--arch/arm64/kernel/suspend.c6
-rw-r--r--arch/arm64/kernel/syscall.c16
-rw-r--r--arch/arm64/kernel/vdso.c26
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c4
-rw-r--r--arch/arm64/mm/fault.c18
-rw-r--r--arch/arm64/mm/kasan_init.c29
-rw-r--r--arch/arm64/mm/mmu.c41
-rw-r--r--arch/arm64/mm/proc.S48
-rw-r--r--arch/arm64/mm/ptdump.c4
-rw-r--r--arch/arm64/mm/ptdump_debugfs.c2
-rw-r--r--arch/m68k/configs/amiga_defconfig5
-rw-r--r--arch/m68k/configs/apollo_defconfig5
-rw-r--r--arch/m68k/configs/atari_defconfig5
-rw-r--r--arch/m68k/configs/bvme6000_defconfig5
-rw-r--r--arch/m68k/configs/hp300_defconfig5
-rw-r--r--arch/m68k/configs/mac_defconfig5
-rw-r--r--arch/m68k/configs/multi_defconfig5
-rw-r--r--arch/m68k/configs/mvme147_defconfig5
-rw-r--r--arch/m68k/configs/mvme16x_defconfig5
-rw-r--r--arch/m68k/configs/q40_defconfig5
-rw-r--r--arch/m68k/configs/sun3_defconfig5
-rw-r--r--arch/m68k/configs/sun3x_defconfig5
-rw-r--r--arch/m68k/fpsp040/Makefile4
-rw-r--r--arch/m68k/ifpsp060/Makefile2
-rw-r--r--arch/m68k/include/asm/mvme147hw.h3
-rw-r--r--arch/m68k/include/asm/sun3xflop.h2
-rw-r--r--arch/m68k/kernel/sys_m68k.c2
-rw-r--r--arch/m68k/kernel/syscalls/Makefile14
-rw-r--r--arch/m68k/kernel/syscalls/syscallhdr.sh36
-rw-r--r--arch/m68k/kernel/syscalls/syscalltbl.sh32
-rw-r--r--arch/m68k/kernel/syscalltable.S3
-rw-r--r--arch/m68k/mvme147/config.c14
-rw-r--r--arch/m68k/mvme16x/config.c14
-rw-r--r--arch/mips/crypto/poly1305-glue.c2
-rw-r--r--arch/mips/netlogic/common/irq.c6
-rw-r--r--arch/powerpc/crypto/sha1-spe-glue.c2
-rw-r--r--arch/s390/kernel/stacktrace.c6
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S2
-rw-r--r--arch/x86/boot/compressed/head_64.S172
-rw-r--r--arch/x86/boot/compressed/idt_64.c14
-rw-r--r--arch/x86/boot/compressed/kaslr.c4
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S130
-rw-r--r--arch/x86/boot/compressed/misc.c9
-rw-r--r--arch/x86/boot/compressed/misc.h6
-rw-r--r--arch/x86/boot/compressed/sev-es.c12
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c6
-rw-r--r--arch/x86/crypto/poly1305_glue.c6
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S2
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c2
-rw-r--r--arch/x86/entry/common.c3
-rw-r--r--arch/x86/entry/entry_32.S8
-rw-r--r--arch/x86/entry/entry_64.S4
-rw-r--r--arch/x86/entry/vdso/vdso2c.c2
-rw-r--r--arch/x86/entry/vdso/vdso2c.h2
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S4
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/entry/vdso/vsgx.S2
-rw-r--r--arch/x86/events/amd/core.c2
-rw-r--r--arch/x86/events/amd/iommu.h2
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/events/intel/bts.c2
-rw-r--r--arch/x86/events/intel/core.c16
-rw-r--r--arch/x86/events/intel/ds.c2
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/p4.c6
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c73
-rw-r--r--arch/x86/events/zhaoxin/core.c2
-rw-r--r--arch/x86/hyperv/hv_init.c4
-rw-r--r--arch/x86/include/asm/agp.h2
-rw-r--r--arch/x86/include/asm/alternative-asm.h114
-rw-r--r--arch/x86/include/asm/alternative.h142
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/cpu.h7
-rw-r--r--arch/x86/include/asm/cpufeature.h41
-rw-r--r--arch/x86/include/asm/cpufeatures.h5
-rw-r--r--arch/x86/include/asm/elf.h10
-rw-r--r--arch/x86/include/asm/entry-common.h16
-rw-r--r--arch/x86/include/asm/idtentry.h2
-rw-r--r--arch/x86/include/asm/intel_pconfig.h2
-rw-r--r--arch/x86/include/asm/intel_pt.h2
-rw-r--r--arch/x86/include/asm/io.h2
-rw-r--r--arch/x86/include/asm/irq_stack.h2
-rw-r--r--arch/x86/include/asm/irqflags.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h3
-rw-r--r--arch/x86/include/asm/paravirt.h167
-rw-r--r--arch/x86/include/asm/paravirt_types.h212
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/set_memory.h4
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sgx.h (renamed from arch/x86/kernel/cpu/sgx/arch.h)52
-rw-r--r--arch/x86/include/asm/smap.h5
-rw-r--r--arch/x86/include/asm/switch_to.h7
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h1
-rw-r--r--arch/x86/include/asm/thread_info.h8
-rw-r--r--arch/x86/include/asm/uv/uv_geo.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h4
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h1
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h2
-rw-r--r--arch/x86/include/uapi/asm/sgx.h2
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h2
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h2
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/alternative.c52
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/apic.c10
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/apic/vector.c17
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c33
-rw-r--r--arch/x86/kernel/apm_32.c6
-rw-r--r--arch/x86/kernel/asm-offsets.c7
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c3
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/feat_ctl.c71
-rw-r--r--arch/x86/kernel/cpu/intel.c113
-rw-r--r--arch/x86/kernel/cpu/mce/core.c2
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c6
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c8
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c6
-rw-r--r--arch/x86/kernel/cpu/resctrl/pseudo_lock.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/Makefile1
-rw-r--r--arch/x86/kernel/cpu/sgx/driver.c17
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c33
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.h1
-rw-r--r--arch/x86/kernel/cpu/sgx/encls.h30
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c43
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c268
-rw-r--r--arch/x86/kernel/cpu/sgx/sgx.h40
-rw-r--r--arch/x86/kernel/cpu/sgx/virt.c376
-rw-r--r--arch/x86/kernel/cpu/topology.c4
-rw-r--r--arch/x86/kernel/cpu/vmware.c7
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/idt.c2
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c9
-rw-r--r--arch/x86/kernel/paravirt.c75
-rw-r--r--arch/x86/kernel/paravirt_patch.c99
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/pvclock.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S2
-rw-r--r--arch/x86/kernel/setup.c115
-rw-r--r--arch/x86/kernel/sev-es-shared.c16
-rw-r--r--arch/x86/kernel/sev-es.c38
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smp.c4
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/stacktrace.c6
-rw-r--r--arch/x86/kernel/sysfb_efi.c2
-rw-r--r--arch/x86/kernel/tboot.c44
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/umip.c2
-rw-r--r--arch/x86/kvm/Kconfig12
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/irq_comm.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c6
-rw-r--r--arch/x86/kvm/pmu.h2
-rw-r--r--arch/x86/kvm/svm/avic.c4
-rw-r--r--arch/x86/kvm/svm/sev.c2
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/nested.c2
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c2
-rw-r--r--arch/x86/kvm/vmx/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c34
-rw-r--r--arch/x86/lib/atomic64_386_32.S2
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S2
-rw-r--r--arch/x86/lib/copy_page_64.S2
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/insn-eval.c6
-rw-r--r--arch/x86/lib/memcpy_64.S2
-rw-r--r--arch/x86/lib/memmove_64.S2
-rw-r--r--arch/x86/lib/memset_64.S2
-rw-r--r--arch/x86/lib/mmx_32.c2
-rw-r--r--arch/x86/lib/msr-smp.c4
-rw-r--r--arch/x86/lib/msr.c4
-rw-r--r--arch/x86/lib/retpoline.S2
-rw-r--r--arch/x86/math-emu/fpu_trig.c11
-rw-r--r--arch/x86/math-emu/reg_ld_str.c2
-rw-r--r--arch/x86/math-emu/reg_round.S2
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/init_64.c6
-rw-r--r--arch/x86/mm/kaslr.c2
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/mem_encrypt.c6
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S2
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c35
-rw-r--r--arch/x86/mm/pat/memtype.c4
-rw-r--r--arch/x86/mm/pat/set_memory.c2
-rw-r--r--arch/x86/mm/pkeys.c2
-rw-r--r--arch/x86/mm/pti.c11
-rw-r--r--arch/x86/mm/tlb.c6
-rw-r--r--arch/x86/net/bpf_jit_comp.c4
-rw-r--r--arch/x86/pci/fixup.c2
-rw-r--r--arch/x86/platform/efi/efi_64.c4
-rw-r--r--arch/x86/platform/efi/quirks.c4
-rw-r--r--arch/x86/platform/intel-quark/imr.c4
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c2
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c4
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c2
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c2
-rw-r--r--arch/x86/platform/pvh/head.S6
-rw-r--r--arch/x86/platform/uv/uv_nmi.c48
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c4
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--arch/x86/xen/time.c26
323 files changed, 3305 insertions, 2032 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index ecfd3520b676..6b11c825fc36 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1054,6 +1054,29 @@ config VMAP_STACK
backing virtual mappings with real shadow memory, and KASAN_VMALLOC
must be enabled.
+config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+ def_bool n
+ help
+ An arch should select this symbol if it can support kernel stack
+ offset randomization with calls to add_random_kstack_offset()
+ during syscall entry and choose_random_kstack_offset() during
+ syscall exit. Careful removal of -fstack-protector-strong and
+ -fstack-protector should also be applied to the entry code and
+ closely examined, as the artificial stack bump looks like an array
+ to the compiler, so it will attempt to add canary checks regardless
+ of the static branch state.
+
+config RANDOMIZE_KSTACK_OFFSET_DEFAULT
+ bool "Randomize kernel stack offset on syscall entry"
+ depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+ help
+ The kernel stack offset can be randomized (after pt_regs) by
+ roughly 5 bits of entropy, frustrating memory corruption
+ attacks that depend on stack address determinism or
+ cross-syscall address exposures. This feature is controlled
+ by kernel boot param "randomize_kstack_offset=on/off", and this
+ config chooses the default boot state.
+
config ARCH_OPTIONAL_KERNEL_RWX
def_bool n
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 3bf90d9e3335..a294a02f2d23 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -1168,7 +1168,7 @@
};
};
- target-module@34000 { /* 0x48034000, ap 7 46.0 */
+ timer3_target: target-module@34000 { /* 0x48034000, ap 7 46.0 */
compatible = "ti,sysc-omap4-timer", "ti,sysc";
reg = <0x34000 0x4>,
<0x34010 0x4>;
@@ -1195,7 +1195,7 @@
};
};
- target-module@36000 { /* 0x48036000, ap 9 4e.0 */
+ timer4_target: target-module@36000 { /* 0x48036000, ap 9 4e.0 */
compatible = "ti,sysc-omap4-timer", "ti,sysc";
reg = <0x36000 0x4>,
<0x36010 0x4>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index ce1194744f84..53d68786a61f 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -46,6 +46,7 @@
timer {
compatible = "arm,armv7-timer";
+ status = "disabled"; /* See ARM architected timer wrap erratum i940 */
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
@@ -1241,3 +1242,22 @@
assigned-clock-parents = <&sys_32k_ck>;
};
};
+
+/* Local timers, see ARM architected timer wrap erratum i940 */
+&timer3_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+ assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
+ };
+};
+
+&timer4_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+ assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
+ };
+};
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index 472e56d09eea..1da3f41359aa 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -99,28 +99,6 @@
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
.endm
- .macro __rev, out, in
- .if __LINUX_ARM_ARCH__ < 6
- lsl t0, \in, #24
- and t1, \in, #0xff00
- and t2, \in, #0xff0000
- orr \out, t0, \in, lsr #24
- orr \out, \out, t1, lsl #8
- orr \out, \out, t2, lsr #8
- .else
- rev \out, \in
- .endif
- .endm
-
- .macro __adrl, out, sym, c
- .if __LINUX_ARM_ARCH__ < 7
- ldr\c \out, =\sym
- .else
- movw\c \out, #:lower16:\sym
- movt\c \out, #:upper16:\sym
- .endif
- .endm
-
.macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
@@ -133,10 +111,10 @@
ldr r7, [in, #12]
#ifdef CONFIG_CPU_BIG_ENDIAN
- __rev r4, r4
- __rev r5, r5
- __rev r6, r6
- __rev r7, r7
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
#endif
eor r4, r4, r8
@@ -144,7 +122,7 @@
eor r6, r6, r10
eor r7, r7, r11
- __adrl ttab, \ttab
+ mov_l ttab, \ttab
/*
* Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
* L1 cache, assuming cacheline size >= 32. This is a hardening measure
@@ -180,7 +158,7 @@
2: .ifb \ltab
add ttab, ttab, #1
.else
- __adrl ttab, \ltab
+ mov_l ttab, \ltab
// Prefetch inverse S-box for final round; see explanation above
.set i, 0
.rept 256 / 64
@@ -194,10 +172,10 @@
\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
#ifdef CONFIG_CPU_BIG_ENDIAN
- __rev r4, r4
- __rev r5, r5
- __rev r6, r6
- __rev r7, r7
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
#endif
ldr out, [sp]
diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c
index 34d73200e7fa..4b59d027ba4a 100644
--- a/arch/arm/crypto/blake2b-neon-glue.c
+++ b/arch/arm/crypto/blake2b-neon-glue.c
@@ -85,8 +85,8 @@ static int __init blake2b_neon_mod_init(void)
static void __exit blake2b_neon_mod_exit(void)
{
- return crypto_unregister_shashes(blake2b_neon_algs,
- ARRAY_SIZE(blake2b_neon_algs));
+ crypto_unregister_shashes(blake2b_neon_algs,
+ ARRAY_SIZE(blake2b_neon_algs));
}
module_init(blake2b_neon_mod_init);
diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S
index bed897e9a181..86345751bbf3 100644
--- a/arch/arm/crypto/blake2s-core.S
+++ b/arch/arm/crypto/blake2s-core.S
@@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
// Registers used to hold message words temporarily. There aren't
// enough ARM registers to hold the whole message block, so we have to
@@ -38,6 +39,23 @@
#endif
.endm
+.macro _le32_bswap a, tmp
+#ifdef __ARMEB__
+ rev_l \a, \tmp
+#endif
+.endm
+
+.macro _le32_bswap_8x a, b, c, d, e, f, g, h, tmp
+ _le32_bswap \a, \tmp
+ _le32_bswap \b, \tmp
+ _le32_bswap \c, \tmp
+ _le32_bswap \d, \tmp
+ _le32_bswap \e, \tmp
+ _le32_bswap \f, \tmp
+ _le32_bswap \g, \tmp
+ _le32_bswap \h, \tmp
+.endm
+
// Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals.
// (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two
// columns/diagonals. s0-s1 are the word offsets to the message words the first
@@ -180,8 +198,10 @@ ENTRY(blake2s_compress_arch)
tst r1, #3
bne .Lcopy_block_misaligned
ldmia r1!, {r2-r9}
+ _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14
stmia r12!, {r2-r9}
ldmia r1!, {r2-r9}
+ _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14
stmia r12, {r2-r9}
.Lcopy_block_done:
str r1, [sp, #68] // Update message pointer
@@ -268,6 +288,7 @@ ENTRY(blake2s_compress_arch)
1:
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
ldr r3, [r1], #4
+ _le32_bswap r3, r4
#else
ldrb r3, [r1, #0]
ldrb r4, [r1, #1]
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
index 2985b80a45b5..083fe1ab96d0 100644
--- a/arch/arm/crypto/chacha-scalar-core.S
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -41,32 +41,15 @@
X14 .req r12
X15 .req r14
-.macro __rev out, in, t0, t1, t2
-.if __LINUX_ARM_ARCH__ >= 6
- rev \out, \in
-.else
- lsl \t0, \in, #24
- and \t1, \in, #0xff00
- and \t2, \in, #0xff0000
- orr \out, \t0, \in, lsr #24
- orr \out, \out, \t1, lsl #8
- orr \out, \out, \t2, lsr #8
-.endif
-.endm
-
-.macro _le32_bswap x, t0, t1, t2
+.macro _le32_bswap_4x a, b, c, d, tmp
#ifdef __ARMEB__
- __rev \x, \x, \t0, \t1, \t2
+ rev_l \a, \tmp
+ rev_l \b, \tmp
+ rev_l \c, \tmp
+ rev_l \d, \tmp
#endif
.endm
-.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
- _le32_bswap \a, \t0, \t1, \t2
- _le32_bswap \b, \t0, \t1, \t2
- _le32_bswap \c, \t0, \t1, \t2
- _le32_bswap \d, \t0, \t1, \t2
-.endm
-
.macro __ldrd a, b, src, offset
#if __LINUX_ARM_ARCH__ >= 6
ldrd \a, \b, [\src, #\offset]
@@ -200,7 +183,7 @@
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
- _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ _le32_bswap_4x X0, X1, X2, X3, r8
ldmia r12!, {r8-r11}
eor X0, X0, r8
eor X1, X1, r9
@@ -216,7 +199,7 @@
ldmia r12!, {X0-X3}
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
- _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ _le32_bswap_4x X4, X5, X6, X7, r8
eor X4, X4, X0
eor X5, X5, X1
eor X6, X6, X2
@@ -231,7 +214,7 @@
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
- _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ _le32_bswap_4x r0, r1, r6, r7, r8
ldmia r12!, {r8-r11}
eor r0, r0, r8 // x8
eor r1, r1, r9 // x9
@@ -245,7 +228,7 @@
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
- _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ _le32_bswap_4x r2, r3, r4, r5, r9
ldr r9, [sp, #72] // load LEN
eor r2, r2, r0 // x12
eor r3, r3, r1 // x13
@@ -301,7 +284,7 @@
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
- _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ _le32_bswap_4x X0, X1, X2, X3, r8
stmia r14!, {X0-X3}
// Save keystream for x4-x7
@@ -311,7 +294,7 @@
add X5, r9, X5, ror #brot
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
- _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ _le32_bswap_4x X4, X5, X6, X7, r8
add r8, sp, #64
stmia r14!, {X4-X7}
@@ -323,7 +306,7 @@
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
- _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ _le32_bswap_4x r0, r1, r6, r7, r8
stmia r14!, {r0,r1,r6,r7}
__ldrd r8, r9, sp, 144
__ldrd r10, r11, sp, 152
@@ -331,7 +314,7 @@
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
- _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ _le32_bswap_4x r2, r3, r4, r5, r9
stmia r14, {r2-r5}
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
index be18af52e7dc..b697fa5d059a 100644
--- a/arch/arm/crypto/curve25519-core.S
+++ b/arch/arm/crypto/curve25519-core.S
@@ -10,8 +10,8 @@
#include <linux/linkage.h>
.text
-.fpu neon
.arch armv7-a
+.fpu neon
.align 4
ENTRY(curve25519_neon)
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
index 3023c1acfa19..c31bd8f7c092 100644
--- a/arch/arm/crypto/poly1305-glue.c
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -29,7 +29,7 @@ void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_arm(&dctx->h, key);
dctx->s[0] = get_unaligned_le32(key + 16);
diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h
index cdbf02d9c1d4..95d5b0d625cd 100644
--- a/arch/arm/include/asm/paravirt.h
+++ b/arch/arm/include/asm/paravirt.h
@@ -3,23 +3,19 @@
#define _ASM_ARM_PARAVIRT_H
#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops {
- unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
- struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
static inline u64 paravirt_steal_clock(int cpu)
{
- return pv_ops.time.steal_clock(cpu);
+ return static_call(pv_steal_clock)(cpu);
}
#endif
diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c
index 4cfed91fe256..7dd9806369fb 100644
--- a/arch/arm/kernel/paravirt.c
+++ b/arch/arm/kernel/paravirt.c
@@ -9,10 +9,15 @@
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/types.h>
+#include <linux/static_call.h>
#include <asm/paravirt.h>
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
index 45c19ca96f7a..ec0d9b094744 100644
--- a/arch/arm/mach-pxa/pxa_cplds_irqs.c
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -147,22 +147,20 @@ static int cplds_probe(struct platform_device *pdev)
}
irq_set_irq_wake(fpga->irq, 1);
- fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
- CPLDS_NB_IRQ,
- &cplds_irq_domain_ops, fpga);
+ if (base_irq)
+ fpga->irqdomain = irq_domain_add_legacy(pdev->dev.of_node,
+ CPLDS_NB_IRQ,
+ base_irq, 0,
+ &cplds_irq_domain_ops,
+ fpga);
+ else
+ fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
+ CPLDS_NB_IRQ,
+ &cplds_irq_domain_ops,
+ fpga);
if (!fpga->irqdomain)
return -ENODEV;
- if (base_irq) {
- ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0,
- CPLDS_NB_IRQ);
- if (ret) {
- dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n",
- base_irq, base_irq + CPLDS_NB_IRQ);
- return ret;
- }
- }
-
return 0;
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfdc3e0af5e1..406b42c05ee1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -108,9 +108,9 @@ config ARM64
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
+ select GENERIC_FIND_FIRST_BIT
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_IPI
- select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
@@ -138,6 +138,7 @@ config ARM64
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
+ select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
select HAVE_ARCH_KFENCE
@@ -146,6 +147,7 @@ config ARM64
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_PFN_VALID
select HAVE_ARCH_PREL32_RELOCATIONS
+ select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
@@ -194,6 +196,7 @@ config ARM64
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
+ select KASAN_VMALLOC if KASAN_GENERIC
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
@@ -1068,6 +1071,9 @@ config SYS_SUPPORTS_HUGETLBFS
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
+config ARCH_HAS_FILTER_PGPROT
+ def_bool y
+
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2
@@ -1429,19 +1435,6 @@ config ARM64_USE_LSE_ATOMICS
built with binutils >= 2.25 in order for the new instructions
to be used.
-config ARM64_VHE
- bool "Enable support for Virtualization Host Extensions (VHE)"
- default y
- help
- Virtualization Host Extensions (VHE) allow the kernel to run
- directly at EL2 (instead of EL1) on processors that support
- it. This leads to better performance for KVM, as they reduce
- the cost of the world switch.
-
- Selecting this option allows the VHE feature to be detected
- at runtime, and does not affect processors that do not
- implement this feature.
-
endmenu
menu "ARMv8.2 architectural features"
@@ -1695,10 +1688,23 @@ config ARM64_MTE
endmenu
+menu "ARMv8.7 architectural features"
+
+config ARM64_EPAN
+ bool "Enable support for Enhanced Privileged Access Never (EPAN)"
+ default y
+ depends on ARM64_PAN
+ help
+ Enhanced Privileged Access Never (EPAN) allows Privileged
+ Access Never to be used with Execute-only mappings.
+
+ The feature is detected at runtime, and will remain disabled
+ if the cpu does not implement the feature.
+endmenu
+
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
- depends on !KVM || ARM64_VHE
help
The Scalable Vector Extension (SVE) is an extension to the AArch64
execution state which complements and extends the SIMD functionality
@@ -1727,12 +1733,6 @@ config ARM64_SVE
booting the kernel. If unsure and you are not observing these
symptoms, you should assume that it is safe to say Y.
- CPUs that support SVE are architecturally required to support the
- Virtualization Host Extensions (VHE), so the kernel makes no
- provision for supporting SVE alongside KVM without VHE enabled.
- Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
- KVM in the same kernel image.
-
config ARM64_MODULE_PLTS
bool "Use PLTs to allow module memory to spill over into vmalloc area"
depends on MODULES
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts
index e79ce49e7e6a..596a25907432 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts
@@ -21,5 +21,5 @@
};
&mmc0 {
- cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 push-push switch */
+ broken-cd; /* card detect is broken on *some* boards */
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
index 9f5f5e1fa82e..683743f81849 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
@@ -10,7 +10,7 @@
model = "NVIDIA Jetson TX2 Developer Kit";
compatible = "nvidia,p2771-0000", "nvidia,tegra186";
- aconnect {
+ aconnect@2900000 {
status = "okay";
dma-controller@2930000 {
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
index fd9177447711..fcd71bfc6707 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi
@@ -23,7 +23,7 @@
};
chosen {
- bootargs = "earlycon console=ttyS0,115200n8";
+ bootargs = "earlycon console=ttyS0,115200n8 fw_devlink=on";
stdout-path = "serial0:115200n8";
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index 02b26b39cedc..9f75bbf00cf7 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -73,7 +73,7 @@
snps,rxpbl = <8>;
};
- aconnect {
+ aconnect@2900000 {
compatible = "nvidia,tegra186-aconnect",
"nvidia,tegra210-aconnect";
clocks = <&bpmp TEGRA186_CLK_APE>,
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
index 2888efc42ba1..d618f197a1d3 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts
@@ -651,6 +651,8 @@
reg = <0x1a>;
interrupt-parent = <&gpio>;
interrupts = <TEGRA194_MAIN_GPIO(S, 5) GPIO_ACTIVE_HIGH>;
+ clocks = <&bpmp TEGRA194_CLK_AUD_MCLK>;
+ clock-names = "mclk";
realtek,jd-src = <2>;
sound-name-prefix = "CVB-RT";
@@ -658,7 +660,6 @@
rt5658_ep: endpoint {
remote-endpoint = <&i2s1_dap_ep>;
mclk-fs = <256>;
- clocks = <&bpmp TEGRA194_CLK_AUD_MCLK>;
};
};
};
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi
index 7da3d48cb410..14da4206ea66 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi
@@ -5,6 +5,10 @@
model = "NVIDIA Jetson Xavier NX (SD-card)";
compatible = "nvidia,p3668-0000", "nvidia,tegra194";
+ aliases {
+ mmc0 = "/bus@0/mmc@3400000";
+ };
+
bus@0 {
/* SDMMC1 (SD/MMC) */
mmc@3400000 {
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi
index b7808648cfe4..f5a9ebbfb12f 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0001.dtsi
@@ -5,6 +5,10 @@
model = "NVIDIA Jetson Xavier NX (eMMC)";
compatible = "nvidia,p3668-0001", "nvidia,tegra194";
+ aliases {
+ mmc0 = "/bus@0/mmc@3460000";
+ };
+
bus@0 {
/* SDMMC4 (eMMC) */
mmc@3460000 {
diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
index 4f12721c332b..f16b0aa8a374 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668.dtsi
@@ -14,7 +14,6 @@
i2c5 = "/bus@0/i2c@31c0000";
i2c6 = "/bus@0/i2c@c250000";
i2c7 = "/bus@0/i2c@31e0000";
- mmc0 = "/bus@0/mmc@3460000";
rtc0 = "/bpmp/i2c/pmic@3c";
rtc1 = "/bus@0/rtc@c2a0000";
serial0 = &tcu;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d612f633b771..8793a9cb9d4b 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1156,6 +1156,7 @@ CONFIG_CRYPTO_DEV_HISI_TRNG=m
CONFIG_CMA_SIZE_MBYTES=32
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index bbdb54702aa7..b495de22bb38 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -359,6 +359,7 @@ ST5( mov v4.16b, vctr.16b )
ins vctr.d[0], x8
/* apply carry to N counter blocks for N := x12 */
+ cbz x12, 2f
adr x16, 1f
sub x16, x16, x12, lsl #3
br x16
@@ -700,7 +701,7 @@ AES_FUNC_START(aes_mac_update)
cbz w5, .Lmacout
encrypt_block v0, w2, x1, x7, w8
st1 {v0.16b}, [x4] /* return dg */
- cond_yield .Lmacout, x7
+ cond_yield .Lmacout, x7, x8
b .Lmacloop4x
.Lmac1x:
add w3, w3, #4
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
index 683de671741a..9c3d86e397bf 100644
--- a/arch/arm64/crypto/poly1305-glue.c
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -25,7 +25,7 @@ asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_arm64(&dctx->h, key);
dctx->s[0] = get_unaligned_le32(key + 16);
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 8c02bbc2684e..889ca0f8972b 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -121,7 +121,7 @@ CPU_LE( rev32 v11.16b, v11.16b )
add dgav.4s, dgav.4s, dg0v.4s
cbz w2, 2f
- cond_yield 3f, x5
+ cond_yield 3f, x5, x6
b 0b
/*
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 6cdea7d56059..491179922f49 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -129,7 +129,7 @@ CPU_LE( rev32 v19.16b, v19.16b )
/* handled all input blocks? */
cbz w2, 2f
- cond_yield 3f, x5
+ cond_yield 3f, x5, x6
b 0b
/*
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 6f5208414fe3..9c77313f5a60 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -184,11 +184,11 @@ SYM_FUNC_START(sha3_ce_transform)
eor v0.16b, v0.16b, v31.16b
cbnz w8, 3b
- cond_yield 3f, x8
+ cond_yield 4f, x8, x9
cbnz w2, 0b
/* save state */
-3: st1 { v0.1d- v3.1d}, [x0], #32
+4: st1 { v0.1d- v3.1d}, [x0], #32
st1 { v4.1d- v7.1d}, [x0], #32
st1 { v8.1d-v11.1d}, [x0], #32
st1 {v12.1d-v15.1d}, [x0], #32
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index d6e7f6c95fa6..b6a3a36e15f5 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -195,7 +195,7 @@ CPU_LE( rev64 v19.16b, v19.16b )
add v10.2d, v10.2d, v2.2d
add v11.2d, v11.2d, v3.2d
- cond_yield 3f, x4
+ cond_yield 3f, x4, x5
/* handled all input blocks? */
cbnz w2, 0b
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 880b9054d75c..934b9be582d2 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -173,7 +173,7 @@ static inline void gic_pmr_mask_irqs(void)
static inline void gic_arch_enable_irqs(void)
{
- asm volatile ("msr daifclr, #2" : : : "memory");
+ asm volatile ("msr daifclr, #3" : : : "memory");
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 9f0ec21d6327..88d20f04c64a 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -165,25 +165,6 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
isb();
}
-/*
- * Ensure that reads of the counter are treated the same as memory reads
- * for the purposes of ordering by subsequent memory barriers.
- *
- * This insanity brought to you by speculative system register reads,
- * out-of-order memory accesses, sequence locks and Thomas Gleixner.
- *
- * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
- */
-#define arch_counter_enforce_ordering(val) do { \
- u64 tmp, _val = (val); \
- \
- asm volatile( \
- " eor %0, %1, %1\n" \
- " add %0, sp, %0\n" \
- " ldr xzr, [%0]" \
- : "=r" (tmp) : "r" (_val)); \
-} while (0)
-
static __always_inline u64 __arch_counter_get_cntpct_stable(void)
{
u64 cnt;
@@ -224,8 +205,6 @@ static __always_inline u64 __arch_counter_get_cntvct(void)
return cnt;
}
-#undef arch_counter_enforce_ordering
-
static inline int arch_timer_arch_init(void)
{
return 0;
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index 52dead2a8640..8ca2dc0661ee 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -13,30 +13,12 @@
* so use the base value of ldp as thread.keys_user and offset as
* thread.keys_user.ap*.
*/
- .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+ .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
mov \tmp1, #THREAD_KEYS_USER
add \tmp1, \tsk, \tmp1
-alternative_if_not ARM64_HAS_ADDRESS_AUTH
- b .Laddr_auth_skip_\@
-alternative_else_nop_endif
ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
msr_s SYS_APIAKEYLO_EL1, \tmp2
msr_s SYS_APIAKEYHI_EL1, \tmp3
- ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIB]
- msr_s SYS_APIBKEYLO_EL1, \tmp2
- msr_s SYS_APIBKEYHI_EL1, \tmp3
- ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDA]
- msr_s SYS_APDAKEYLO_EL1, \tmp2
- msr_s SYS_APDAKEYHI_EL1, \tmp3
- ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDB]
- msr_s SYS_APDBKEYLO_EL1, \tmp2
- msr_s SYS_APDBKEYHI_EL1, \tmp3
-.Laddr_auth_skip_\@:
-alternative_if ARM64_HAS_GENERIC_AUTH
- ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APGA]
- msr_s SYS_APGAKEYLO_EL1, \tmp2
- msr_s SYS_APGAKEYHI_EL1, \tmp3
-alternative_else_nop_endif
.endm
.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ca31594d3d6c..ab569b0b45fc 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -15,6 +15,7 @@
#include <asm-generic/export.h>
#include <asm/asm-offsets.h>
+#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/debug-monitors.h>
@@ -23,6 +24,14 @@
#include <asm/ptrace.h>
#include <asm/thread_info.h>
+ /*
+ * Provide a wxN alias for each wN register so what we can paste a xN
+ * reference after a 'w' to obtain the 32-bit version.
+ */
+ .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+ wx\n .req w\n
+ .endr
+
.macro save_and_disable_daif, flags
mrs \flags, daif
msr daifset, #0xf
@@ -40,9 +49,9 @@
msr daif, \flags
.endm
- /* IRQ is the lowest priority flag, unconditionally unmask the rest. */
- .macro enable_da_f
- msr daifclr, #(8 | 4 | 1)
+ /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */
+ .macro enable_da
+ msr daifclr, #(8 | 4)
.endm
/*
@@ -50,7 +59,7 @@
*/
.macro save_and_disable_irq, flags
mrs \flags, daif
- msr daifset, #2
+ msr daifset, #3
.endm
.macro restore_irq, flags
@@ -692,90 +701,33 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
isb
.endm
-/*
- * Check whether to yield to another runnable task from kernel mode NEON code
- * (which runs with preemption disabled).
- *
- * if_will_cond_yield_neon
- * // pre-yield patchup code
- * do_cond_yield_neon
- * // post-yield patchup code
- * endif_yield_neon <label>
- *
- * where <label> is optional, and marks the point where execution will resume
- * after a yield has been performed. If omitted, execution resumes right after
- * the endif_yield_neon invocation. Note that the entire sequence, including
- * the provided patchup code, will be omitted from the image if
- * CONFIG_PREEMPTION is not defined.
- *
- * As a convenience, in the case where no patchup code is required, the above
- * sequence may be abbreviated to
- *
- * cond_yield_neon <label>
- *
- * Note that the patchup code does not support assembler directives that change
- * the output section, any use of such directives is undefined.
- *
- * The yield itself consists of the following:
- * - Check whether the preempt count is exactly 1 and a reschedule is also
- * needed. If so, calling of preempt_enable() in kernel_neon_end() will
- * trigger a reschedule. If it is not the case, yielding is pointless.
- * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
- * code.
- *
- * This macro sequence may clobber all CPU state that is not guaranteed by the
- * AAPCS to be preserved across an ordinary function call.
- */
-
- .macro cond_yield_neon, lbl
- if_will_cond_yield_neon
- do_cond_yield_neon
- endif_yield_neon \lbl
- .endm
-
- .macro if_will_cond_yield_neon
-#ifdef CONFIG_PREEMPTION
- get_current_task x0
- ldr x0, [x0, #TSK_TI_PREEMPT]
- sub x0, x0, #PREEMPT_DISABLE_OFFSET
- cbz x0, .Lyield_\@
- /* fall through to endif_yield_neon */
- .subsection 1
-.Lyield_\@ :
-#else
- .section ".discard.cond_yield_neon", "ax"
-#endif
- .endm
-
- .macro do_cond_yield_neon
- bl kernel_neon_end
- bl kernel_neon_begin
- .endm
-
- .macro endif_yield_neon, lbl
- .ifnb \lbl
- b \lbl
- .else
- b .Lyield_out_\@
- .endif
- .previous
-.Lyield_out_\@ :
- .endm
-
/*
- * Check whether preempt-disabled code should yield as soon as it
- * is able. This is the case if re-enabling preemption a single
- * time results in a preempt count of zero, and the TIF_NEED_RESCHED
- * flag is set. (Note that the latter is stored negated in the
- * top word of the thread_info::preempt_count field)
+ * Check whether preempt/bh-disabled asm code should yield as soon as
+ * it is able. This is the case if we are currently running in task
+ * context, and either a softirq is pending, or the TIF_NEED_RESCHED
+ * flag is set and re-enabling preemption a single time would result in
+ * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
+ * stored negated in the top word of the thread_info::preempt_count
+ * field)
*/
- .macro cond_yield, lbl:req, tmp:req
-#ifdef CONFIG_PREEMPTION
+ .macro cond_yield, lbl:req, tmp:req, tmp2:req
get_current_task \tmp
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
+ /*
+ * If we are serving a softirq, there is no point in yielding: the
+ * softirq will not be preempted no matter what we do, so we should
+ * run to completion as quickly as we can.
+ */
+ tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
+#ifdef CONFIG_PREEMPTION
sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
cbz \tmp, \lbl
#endif
+ adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
+ this_cpu_offset \tmp2
+ ldr w\tmp, [\tmp, \tmp2]
+ cbnz w\tmp, \lbl // yield on pending softirq in task context
+.Lnoyield_\@:
.endm
/*
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index c3009b0e5239..065ba482daf0 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,10 +25,6 @@
#define psb_csync() asm volatile("hint #17" : : : "memory")
#define csdb() asm volatile("hint #20" : : : "memory")
-#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \
- SB_BARRIER_INSN"nop\n", \
- ARM64_HAS_SB))
-
#ifdef CONFIG_ARM64_PSEUDO_NMI
#define pmr_sync() \
do { \
@@ -70,6 +66,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
return mask;
}
+/*
+ * Ensure that reads of the counter are treated the same as memory reads
+ * for the purposes of ordering by subsequent memory barriers.
+ *
+ * This insanity brought to you by speculative system register reads,
+ * out-of-order memory accesses, sequence locks and Thomas Gleixner.
+ *
+ * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
+ */
+#define arch_counter_enforce_ordering(val) do { \
+ u64 tmp, _val = (val); \
+ \
+ asm volatile( \
+ " eor %0, %1, %1\n" \
+ " add %0, sp, %0\n" \
+ " ldr xzr, [%0]" \
+ : "=r" (tmp) : "r" (_val)); \
+} while (0)
+
#define __smp_mb() dmb(ish)
#define __smp_rmb() dmb(ishld)
#define __smp_wmb() dmb(ishst)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index c40f2490cd7b..b0c5eda0498f 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -67,7 +67,8 @@
#define ARM64_HAS_LDAPR 59
#define ARM64_KVM_PROTECTED_MODE 60
#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61
+#define ARM64_HAS_EPAN 62
-#define ARM64_NCAPS 62
+#define ARM64_NCAPS 63
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 61177bac49fa..338840c00e8e 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -63,6 +63,23 @@ struct arm64_ftr_bits {
s64 safe_val; /* safe value for FTR_EXACT features */
};
+/*
+ * Describe the early feature override to the core override code:
+ *
+ * @val Values that are to be merged into the final
+ * sanitised value of the register. Only the bitfields
+ * set to 1 in @mask are valid
+ * @mask Mask of the features that are overridden by @val
+ *
+ * A @mask field set to full-1 indicates that the corresponding field
+ * in @val is a valid override.
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-0 denotes that this field has no override
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-1 denotes thath this field has an invalid override.
+ */
struct arm64_ftr_override {
u64 val;
u64 mask;
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 1c26d7baa67f..5eb7af9c4557 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -13,8 +13,8 @@
#include <asm/ptrace.h>
#define DAIF_PROCCTX 0
-#define DAIF_PROCCTX_NOIRQ PSR_I_BIT
-#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT)
+#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT)
+#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
#define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
@@ -47,7 +47,7 @@ static inline unsigned long local_daif_save_flags(void)
if (system_uses_irq_prio_masking()) {
/* If IRQs are masked with PMR, reflect it in the flags */
if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
- flags |= PSR_I_BIT;
+ flags |= PSR_I_BIT | PSR_F_BIT;
}
return flags;
@@ -69,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags)
bool irq_disabled = flags & PSR_I_BIT;
WARN_ON(system_has_prio_mask_debugging() &&
- !(read_sysreg(daif) & PSR_I_BIT));
+ (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT));
if (!irq_disabled) {
trace_hardirqs_on();
@@ -86,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags)
* If interrupts are disabled but we can take
* asynchronous errors, we can take NMIs
*/
- flags &= ~PSR_I_BIT;
+ flags &= ~(PSR_I_BIT | PSR_F_BIT);
pmr = GIC_PRIO_IRQOFF;
} else {
pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index d77d358f9395..b3f2d3bb0938 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -131,6 +131,26 @@
.Lskip_sve_\@:
.endm
+/* Disable any fine grained traps */
+.macro __init_el2_fgt
+ mrs x1, id_aa64mmfr0_el1
+ ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4
+ cbz x1, .Lskip_fgt_\@
+
+ msr_s SYS_HDFGRTR_EL2, xzr
+ msr_s SYS_HDFGWTR_EL2, xzr
+ msr_s SYS_HFGRTR_EL2, xzr
+ msr_s SYS_HFGWTR_EL2, xzr
+ msr_s SYS_HFGITR_EL2, xzr
+
+ mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
+ ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4
+ cbz x1, .Lskip_fgt_\@
+
+ msr_s SYS_HAFGRTR_EL2, xzr
+.Lskip_fgt_\@:
+.endm
+
.macro __init_el2_nvhe_prepare_eret
mov x0, #INIT_PSTATE_EL1
msr spsr_el2, x0
@@ -155,6 +175,7 @@
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
__init_el2_nvhe_sve
+ __init_el2_fgt
__init_el2_nvhe_prepare_eret
.endm
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index bec5f14b622a..ebb263b2d3b1 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -73,6 +73,7 @@ extern void sve_flush_live(void);
extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
+extern void sve_set_vq(unsigned long vq_minus_1);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index b2b0c6405eb0..fac08e18bcd5 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -8,6 +8,10 @@
struct pt_regs;
+int set_handle_irq(void (*handle_irq)(struct pt_regs *));
+#define set_handle_irq set_handle_irq
+int set_handle_fiq(void (*handle_fiq)(struct pt_regs *));
+
static inline int nr_legacy_irqs(void)
{
return 0;
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
index a1020285ea75..81bbfa3a035b 100644
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@@ -2,6 +2,8 @@
#ifndef __ASM_IRQ_WORK_H
#define __ASM_IRQ_WORK_H
+extern void arch_irq_work_raise(void);
+
static inline bool arch_irq_work_has_interrupt(void)
{
return true;
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index ff328e5bbb75..b57b9b1e4344 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -12,15 +12,13 @@
/*
* Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
- * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai'
+ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif'
* order:
* Masking debug exceptions causes all other exceptions to be masked too/
- * Masking SError masks irq, but not debug exceptions. Masking irqs has no
- * side effects for other flags. Keeping to this order makes it easier for
- * entry.S to know which exceptions should be unmasked.
- *
- * FIQ is never expected, but we mask it when we disable debug exceptions, and
- * unmask it at all other times.
+ * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are
+ * always masked and unmasked together, and have no side effects for other
+ * flags. Keeping to this order makes it easier for entry.S to know which
+ * exceptions should be unmasked.
*/
/*
@@ -35,7 +33,7 @@ static inline void arch_local_irq_enable(void)
}
asm volatile(ALTERNATIVE(
- "msr daifclr, #2 // arch_local_irq_enable",
+ "msr daifclr, #3 // arch_local_irq_enable",
__msr_s(SYS_ICC_PMR_EL1, "%0"),
ARM64_HAS_IRQ_PRIO_MASKING)
:
@@ -54,7 +52,7 @@ static inline void arch_local_irq_disable(void)
}
asm volatile(ALTERNATIVE(
- "msr daifset, #2 // arch_local_irq_disable",
+ "msr daifset, #3 // arch_local_irq_disable",
__msr_s(SYS_ICC_PMR_EL1, "%0"),
ARM64_HAS_IRQ_PRIO_MASKING)
:
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..b943879c1c24 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -243,8 +243,10 @@ static inline const void *__tag_set(const void *addr, u8 tag)
}
#ifdef CONFIG_KASAN_HW_TAGS
-#define arch_enable_tagging() mte_enable_kernel()
+#define arch_enable_tagging_sync() mte_enable_kernel_sync()
+#define arch_enable_tagging_async() mte_enable_kernel_async()
#define arch_set_tagging_report_once(state) mte_set_report_once(state)
+#define arch_force_async_tag_fault() mte_check_tfsr_exit()
#define arch_init_tags(max_tag) mte_init_tags(max_tag)
#define arch_get_random_tag() mte_get_random_tag()
#define arch_get_mem_tag(addr) mte_get_mem_tag(addr)
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 7ab500e2ad17..4acf8bf41cad 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -77,7 +77,8 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
} while (curr != end);
}
-void mte_enable_kernel(void);
+void mte_enable_kernel_sync(void);
+void mte_enable_kernel_async(void);
void mte_init_tags(u64 max_tag);
void mte_set_report_once(bool state);
@@ -104,7 +105,11 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
{
}
-static inline void mte_enable_kernel(void)
+static inline void mte_enable_kernel_sync(void)
+{
+}
+
+static inline void mte_enable_kernel_async(void)
{
}
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 9b557a457f24..bc88a1ced0d7 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -39,16 +39,15 @@ void mte_free_tag_storage(char *storage);
void mte_sync_tags(pte_t *ptep, pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
-void flush_mte_state(void);
+void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
+void mte_suspend_enter(void);
void mte_suspend_exit(void);
long set_mte_ctrl(struct task_struct *task, unsigned long arg);
long get_mte_ctrl(struct task_struct *task);
int mte_ptrace_copy_tags(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
-void mte_assign_mem_tag_range(void *addr, size_t size);
-
#else /* CONFIG_ARM64_MTE */
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
@@ -60,12 +59,15 @@ static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
{
}
-static inline void flush_mte_state(void)
+static inline void mte_thread_init_user(void)
{
}
static inline void mte_thread_switch(struct task_struct *next)
{
}
+static inline void mte_suspend_enter(void)
+{
+}
static inline void mte_suspend_exit(void)
{
}
@@ -84,11 +86,51 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
return -EIO;
}
-static inline void mte_assign_mem_tag_range(void *addr, size_t size)
+#endif /* CONFIG_ARM64_MTE */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_mode);
+
+static inline bool system_uses_mte_async_mode(void)
{
+ return static_branch_unlikely(&mte_async_mode);
}
-#endif /* CONFIG_ARM64_MTE */
+void mte_check_tfsr_el1(void);
+
+static inline void mte_check_tfsr_entry(void)
+{
+ mte_check_tfsr_el1();
+}
+
+static inline void mte_check_tfsr_exit(void)
+{
+ /*
+ * The asynchronous faults are sync'ed automatically with
+ * TFSR_EL1 on kernel entry but for exit an explicit dsb()
+ * is required.
+ */
+ dsb(nsh);
+ isb();
+
+ mte_check_tfsr_el1();
+}
+#else
+static inline bool system_uses_mte_async_mode(void)
+{
+ return false;
+}
+static inline void mte_check_tfsr_el1(void)
+{
+}
+static inline void mte_check_tfsr_entry(void)
+{
+}
+static inline void mte_check_tfsr_exit(void)
+{
+}
+#endif /* CONFIG_KASAN_HW_TAGS */
#endif /* __ASSEMBLY__ */
#endif /* __ASM_MTE_H */
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index cf3a0fd7c1a7..9aa193e0e8f2 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -3,23 +3,19 @@
#define _ASM_ARM64_PARAVIRT_H
#ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
struct static_key;
extern struct static_key paravirt_steal_enabled;
extern struct static_key paravirt_steal_rq_enabled;
-struct pv_time_ops {
- unsigned long long (*steal_clock)(int cpu);
-};
-
-struct paravirt_patch_template {
- struct pv_time_ops time;
-};
+u64 dummy_steal_clock(int cpu);
-extern struct paravirt_patch_template pv_ops;
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
static inline u64 paravirt_steal_clock(int cpu)
{
- return pv_ops.time.steal_clock(cpu);
+ return static_call(pv_steal_clock)(cpu);
}
int __init pv_time_init(void);
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 3c6a7f5988b1..31fbab3d6f99 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -27,7 +27,10 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
- __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
+ pudval_t pudval = PUD_TYPE_TABLE;
+
+ pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN;
+ __pud_populate(pudp, __pa(pmdp), pudval);
}
#else
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
@@ -45,7 +48,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
{
- __p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE);
+ p4dval_t p4dval = P4D_TYPE_TABLE;
+
+ p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
+ __p4d_populate(p4dp, __pa(pudp), p4dval);
}
#else
static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
@@ -70,16 +76,15 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
- /*
- * The pmd must be loaded with the physical address of the PTE table
- */
- __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE);
+ VM_BUG_ON(mm != &init_mm);
+ __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
}
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
{
- __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE);
+ VM_BUG_ON(mm == &init_mm);
+ __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN);
}
#define pmd_pgtable(pmd) pmd_page(pmd)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 42442a0ae2ab..b82575a33f8b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -94,6 +94,17 @@
/*
* Hardware page table definitions.
*
+ * Level 0 descriptor (P4D).
+ */
+#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
+#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1)
+#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0)
+#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0)
+#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */
+#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59)
+#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60)
+
+/*
* Level 1 descriptor (PUD).
*/
#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
@@ -101,6 +112,8 @@
#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */
+#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59)
+#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60)
/*
* Level 2 descriptor (PMD).
@@ -122,6 +135,8 @@
#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52)
#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54)
+#define PMD_TABLE_PXN (_AT(pmdval_t, 1) << 59)
+#define PMD_TABLE_UXN (_AT(pmdval_t, 1) << 60)
/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 9a65fb528110..fab2f573f7a4 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -87,12 +87,13 @@ extern bool arm64_use_ng_mappings;
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_READONLY
#define __P011 PAGE_READONLY
-#define __P100 PAGE_READONLY_EXEC
+#define __P100 PAGE_EXECONLY
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_READONLY_EXEC
#define __P111 PAGE_READONLY_EXEC
@@ -101,7 +102,7 @@ extern bool arm64_use_ng_mappings;
#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
+#define __S100 PAGE_EXECONLY
#define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 47027796c2f9..0b10204e72fc 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -113,11 +113,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
+/*
+ * Execute-only user mappings do not have the PTE_USER bit set. All valid
+ * kernel mappings have the PTE_UXN bit set.
+ */
#define pte_valid_not_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
-#define pte_valid_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
-
+ ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
/*
* Could the pte be present in the TLB? We must check mm_tlb_flush_pending
* so that we don't erroneously return false for pages that have been
@@ -130,12 +131,14 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
/*
- * p??_access_permitted() is true for valid user mappings (subject to the
- * write permission check). PROT_NONE mappings do not have the PTE_VALID bit
- * set.
+ * p??_access_permitted() is true for valid user mappings (PTE_USER
+ * bit set, subject to the write permission check). For execute-only
+ * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits
+ * not set) must return false. PROT_NONE mappings do not have the
+ * PTE_VALID bit set.
*/
#define pte_access_permitted(pte, write) \
- (pte_valid_user(pte) && (!(write) || pte_write(pte)))
+ (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
#define pmd_access_permitted(pmd, write) \
(pte_access_permitted(pmd_pte(pmd), (write)))
#define pud_access_permitted(pud, write) \
@@ -995,6 +998,18 @@ static inline bool arch_wants_old_prefaulted_pte(void)
}
#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+ if (cpus_have_const_cap(ARM64_HAS_EPAN))
+ return prot;
+
+ if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY))
+ return prot;
+
+ return PAGE_READONLY_EXEC;
+}
+
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index b112a11e9302..d50416be99be 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -3,6 +3,7 @@
#define __ASM_POINTER_AUTH_H
#include <linux/bitops.h>
+#include <linux/prctl.h>
#include <linux/random.h>
#include <asm/cpufeature.h>
@@ -34,6 +35,25 @@ struct ptrauth_keys_kernel {
struct ptrauth_key apia;
};
+#define __ptrauth_key_install_nosync(k, v) \
+do { \
+ struct ptrauth_key __pki_v = (v); \
+ write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \
+ write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \
+} while (0)
+
+static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
+{
+ if (system_supports_address_auth()) {
+ __ptrauth_key_install_nosync(APIB, keys->apib);
+ __ptrauth_key_install_nosync(APDA, keys->apda);
+ __ptrauth_key_install_nosync(APDB, keys->apdb);
+ }
+
+ if (system_supports_generic_auth())
+ __ptrauth_key_install_nosync(APGA, keys->apga);
+}
+
static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
{
if (system_supports_address_auth()) {
@@ -45,14 +65,9 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
if (system_supports_generic_auth())
get_random_bytes(&keys->apga, sizeof(keys->apga));
-}
-#define __ptrauth_key_install_nosync(k, v) \
-do { \
- struct ptrauth_key __pki_v = (v); \
- write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \
- write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \
-} while (0)
+ ptrauth_keys_install_user(keys);
+}
static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
{
@@ -71,6 +86,10 @@ static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kerne
extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
+extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+ unsigned long enabled);
+extern int ptrauth_get_enabled_keys(struct task_struct *tsk);
+
static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
{
return ptrauth_clear_pac(ptr);
@@ -85,8 +104,23 @@ static __always_inline void ptrauth_enable(void)
isb();
}
-#define ptrauth_thread_init_user(tsk) \
- ptrauth_keys_init_user(&(tsk)->thread.keys_user)
+#define ptrauth_suspend_exit() \
+ ptrauth_keys_install_user(&current->thread.keys_user)
+
+#define ptrauth_thread_init_user() \
+ do { \
+ ptrauth_keys_init_user(&current->thread.keys_user); \
+ \
+ /* enable all keys */ \
+ if (system_supports_address_auth()) \
+ set_task_sctlr_el1(current->thread.sctlr_user | \
+ SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
+ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB); \
+ } while (0)
+
+#define ptrauth_thread_switch_user(tsk) \
+ ptrauth_keys_install_user(&(tsk)->thread.keys_user)
+
#define ptrauth_thread_init_kernel(tsk) \
ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
#define ptrauth_thread_switch_kernel(tsk) \
@@ -95,10 +129,17 @@ static __always_inline void ptrauth_enable(void)
#else /* CONFIG_ARM64_PTR_AUTH */
#define ptrauth_enable()
#define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL)
+#define ptrauth_set_enabled_keys(tsk, keys, enabled) (-EINVAL)
+#define ptrauth_get_enabled_keys(tsk) (-EINVAL)
#define ptrauth_strip_insn_pac(lr) (lr)
-#define ptrauth_thread_init_user(tsk)
+#define ptrauth_suspend_exit()
+#define ptrauth_thread_init_user()
#define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_user(tsk)
#define ptrauth_thread_switch_kernel(tsk)
#endif /* CONFIG_ARM64_PTR_AUTH */
+#define PR_PAC_ENABLED_KEYS_MASK \
+ (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+
#endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index efc10e9041a0..9df3feeee890 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -151,11 +151,15 @@ struct thread_struct {
struct ptrauth_keys_kernel keys_kernel;
#endif
#ifdef CONFIG_ARM64_MTE
- u64 sctlr_tcf0;
u64 gcr_user_excl;
#endif
+ u64 sctlr_user;
};
+#define SCTLR_USER_MASK \
+ (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \
+ SCTLR_EL1_TCF0_MASK)
+
static inline void arch_thread_struct_whitelist(unsigned long *offset,
unsigned long *size)
{
@@ -247,6 +251,8 @@ extern void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
+void set_task_sctlr_el1(u64 sctlr);
+
/* Thread switching */
extern struct task_struct *cpu_switch_to(struct task_struct *prev,
struct task_struct *next);
@@ -303,6 +309,11 @@ extern void __init minsigstksz_setup(void);
/* PR_PAC_RESET_KEYS prctl */
#define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg)
+/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */
+#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled) \
+ ptrauth_set_enabled_keys(tsk, keys, enabled)
+#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk)
+
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 38187f74e089..b1dd7ecff7ef 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -23,7 +23,7 @@ struct ptdump_info {
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
#ifdef CONFIG_PTDUMP_DEBUGFS
-void ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index bcb01ca15325..0e357757c0cc 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -145,6 +145,7 @@ bool cpus_are_stuck_in_kernel(void);
extern void crash_smp_send_stop(void);
extern bool smp_crash_stop_failed(void);
+extern void panic_smp_self_stop(void);
#endif /* ifndef __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index eb29b1fe8255..4b33ca620679 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -148,27 +148,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
return false;
}
-static inline void start_backtrace(struct stackframe *frame,
- unsigned long fp, unsigned long pc)
-{
- frame->fp = fp;
- frame->pc = pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame->graph = 0;
-#endif
-
- /*
- * Prime the first unwind.
- *
- * In unwind_frame() we'll check that the FP points to a valid stack,
- * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
- * treated as a transition to whichever stack that happens to be. The
- * prev_fp value won't be used, but we set it to 0 such that it is
- * definitely not an accessible stack address.
- */
- bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
- frame->prev_fp = 0;
- frame->prev_type = STACK_TYPE_UNKNOWN;
-}
+void start_backtrace(struct stackframe *frame, unsigned long fp,
+ unsigned long pc);
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d4a5fca984c3..b31ac5ccc8ab 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -475,9 +475,15 @@
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
+#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4)
+#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5)
+#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6)
#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
+#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
+#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
+#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
@@ -565,8 +571,10 @@
#define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT)
#define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT)
+#define SCTLR_ELx_ENIA_SHIFT 31
+
#define SCTLR_ELx_ITFSB (BIT(37))
-#define SCTLR_ELx_ENIA (BIT(31))
+#define SCTLR_ELx_ENIA (BIT(SCTLR_ELx_ENIA_SHIFT))
#define SCTLR_ELx_ENIB (BIT(30))
#define SCTLR_ELx_ENDA (BIT(27))
#define SCTLR_ELx_EE (BIT(25))
@@ -597,6 +605,7 @@
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
/* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_EPAN (BIT(57))
#define SCTLR_EL1_ATA0 (BIT(42))
#define SCTLR_EL1_TCF0_SHIFT 38
@@ -637,7 +646,7 @@
SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \
SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \
- SCTLR_EL1_RES1)
+ SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
/* MAIR_ELx memory attributes (used by Linux) */
#define MAIR_ATTR_DEVICE_nGnRnE UL(0x00)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 0deb88467111..b5f08621fa29 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -20,6 +20,7 @@
#include <asm/cpufeature.h>
#include <asm/mmu.h>
+#include <asm/mte.h>
#include <asm/ptrace.h>
#include <asm/memory.h>
#include <asm/extable.h>
@@ -188,6 +189,23 @@ static inline void __uaccess_enable_tco(void)
ARM64_MTE, CONFIG_KASAN_HW_TAGS));
}
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __uaccess_disable_tco_async(void)
+{
+ if (system_uses_mte_async_mode())
+ __uaccess_disable_tco();
+}
+
+static inline void __uaccess_enable_tco_async(void)
+{
+ if (system_uses_mte_async_mode())
+ __uaccess_enable_tco();
+}
+
static inline void uaccess_disable_privileged(void)
{
__uaccess_disable_tco();
@@ -307,8 +325,10 @@ do { \
do { \
int __gkn_err = 0; \
\
+ __uaccess_enable_tco_async(); \
__raw_get_mem("ldr", *((type *)(dst)), \
(__force type *)(src), __gkn_err); \
+ __uaccess_disable_tco_async(); \
if (unlikely(__gkn_err)) \
goto err_label; \
} while (0)
@@ -380,8 +400,10 @@ do { \
do { \
int __pkn_err = 0; \
\
+ __uaccess_enable_tco_async(); \
__raw_put_mem("str", *((type *)(src)), \
(__force type *)(dst), __pkn_err); \
+ __uaccess_disable_tco_async(); \
if (unlikely(__pkn_err)) \
goto err_label; \
} while(0)
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 631ab1281633..4b4c0dac0e14 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -83,11 +83,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
*/
isb();
asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
- /*
- * This isb() is required to prevent that the seq lock is
- * speculated.#
- */
- isb();
+ arch_counter_enforce_ordering(res);
return res;
}
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index ea487218db79..2dcb104c645b 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -55,6 +55,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
{
unsigned long ret, tmp;
+ __uaccess_enable_tco_async();
+
/* Load word from unaligned pointer addr */
asm(
"1: ldr %0, %3\n"
@@ -76,6 +78,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
: "=&r" (ret), "=&r" (tmp)
: "r" (addr), "Q" (*(unsigned long *)addr));
+ __uaccess_disable_tco_async();
+
return ret;
}
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index ed65576ce710..6cc97730790e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -9,6 +9,11 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_insn.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
+# Remove stack protector to avoid triggering unneeded stack canary
+# checks due to randomize_kstack_offset.
+CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
+CFLAGS_syscall.o += -fno-stack-protector
+
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-common.o entry-fpsimd.o process.o ptrace.o \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a36e2fc330d4..e797603e55b7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -43,6 +43,7 @@ int main(void)
#endif
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
+ DEFINE(THREAD_SCTLR_USER, offsetof(struct task_struct, thread.sctlr_user));
#ifdef CONFIG_ARM64_PTR_AUTH
DEFINE(THREAD_KEYS_USER, offsetof(struct task_struct, thread.keys_user));
DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel));
@@ -95,6 +96,8 @@ int main(void)
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
+ DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
+ DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
@@ -147,10 +150,6 @@ int main(void)
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
DEFINE(PTRAUTH_USER_KEY_APIA, offsetof(struct ptrauth_keys_user, apia));
- DEFINE(PTRAUTH_USER_KEY_APIB, offsetof(struct ptrauth_keys_user, apib));
- DEFINE(PTRAUTH_USER_KEY_APDA, offsetof(struct ptrauth_keys_user, apda));
- DEFINE(PTRAUTH_USER_KEY_APDB, offsetof(struct ptrauth_keys_user, apdb));
- DEFINE(PTRAUTH_USER_KEY_APGA, offsetof(struct ptrauth_keys_user, apga));
DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia));
BLANK();
#endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e5281e1c8f1d..76c60b3cda53 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -808,6 +808,12 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
reg->name,
ftrp->shift + ftrp->width - 1,
ftrp->shift, str, tmp);
+ } else if ((ftr_mask & reg->override->val) == ftr_mask) {
+ reg->override->val &= ~ftr_mask;
+ pr_warn("%s[%d:%d]: impossible override, ignored\n",
+ reg->name,
+ ftrp->shift + ftrp->width - 1,
+ ftrp->shift);
}
val = arm64_ftr_set_value(ftrp, val, ftr_new);
@@ -1619,7 +1625,6 @@ int get_cpu_with_amu_feat(void)
}
#endif
-#ifdef CONFIG_ARM64_VHE
static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
{
return is_kernel_in_hyp_mode();
@@ -1638,7 +1643,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN))
write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
}
-#endif
static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
{
@@ -1823,6 +1827,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_pan,
},
#endif /* CONFIG_ARM64_PAN */
+#ifdef CONFIG_ARM64_EPAN
+ {
+ .desc = "Enhanced Privileged Access Never",
+ .capability = ARM64_HAS_EPAN,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR1_EL1,
+ .field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = 3,
+ },
+#endif /* CONFIG_ARM64_EPAN */
#ifdef CONFIG_ARM64_LSE_ATOMICS
{
.desc = "LSE atomic instructions",
@@ -1841,7 +1857,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_no_hw_prefetch,
},
-#ifdef CONFIG_ARM64_VHE
{
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
@@ -1849,7 +1864,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = runs_at_el2,
.cpu_enable = cpu_copy_el2regs,
},
-#endif /* CONFIG_ARM64_VHE */
{
.desc = "32-bit EL0 Support",
.capability = ARM64_HAS_32BIT_EL0,
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 9d3588450473..a1ec351c36bd 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -37,6 +37,8 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
lockdep_hardirqs_off(CALLER_ADDR0);
rcu_irq_enter_check_tick();
trace_hardirqs_off_finish();
+
+ mte_check_tfsr_entry();
}
/*
@@ -47,6 +49,8 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
{
lockdep_assert_irqs_disabled();
+ mte_check_tfsr_exit();
+
if (interrupts_enabled(regs)) {
if (regs->exit_rcu) {
trace_hardirqs_on_prepare();
@@ -293,6 +297,8 @@ asmlinkage void noinstr enter_from_user_mode(void)
asmlinkage void noinstr exit_to_user_mode(void)
{
+ mte_check_tfsr_exit();
+
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
user_enter_irqoff();
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 2ca395c25448..3ecec60d3295 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -48,6 +48,11 @@ SYM_FUNC_START(sve_get_vl)
ret
SYM_FUNC_END(sve_get_vl)
+SYM_FUNC_START(sve_set_vq)
+ sve_load_vq x0, x1, x2
+ ret
+SYM_FUNC_END(sve_set_vq)
+
/*
* Load SVE state from FPSIMD state.
*
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6acfc5e6b5e0..4ac5455c0ead 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -249,7 +249,29 @@ alternative_else_nop_endif
check_mte_async_tcf x22, x23
apply_ssbd 1, x22, x23
- ptrauth_keys_install_kernel tsk, x20, x22, x23
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ /*
+ * Enable IA for in-kernel PAC if the task had it disabled. Although
+ * this could be implemented with an unconditional MRS which would avoid
+ * a load, this was measured to be slower on Cortex-A75 and Cortex-A76.
+ *
+ * Install the kernel IA key only if IA was enabled in the task. If IA
+ * was disabled on kernel exit then we would have left the kernel IA
+ * installed so there is no need to install it again.
+ */
+ ldr x0, [tsk, THREAD_SCTLR_USER]
+ tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f
+ __ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
+ b 2f
+1:
+ mrs x0, sctlr_el1
+ orr x0, x0, SCTLR_ELx_ENIA
+ msr sctlr_el1, x0
+2:
+ isb
+alternative_else_nop_endif
+#endif
mte_set_kernel_gcr x22, x23
@@ -353,8 +375,26 @@ alternative_else_nop_endif
3:
scs_save tsk, x0
- /* No kernel C function calls after this as user keys are set. */
- ptrauth_keys_install_user tsk, x0, x1, x2
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+ /*
+ * IA was enabled for in-kernel PAC. Disable it now if needed, or
+ * alternatively install the user's IA. All other per-task keys and
+ * SCTLR bits were updated on task switch.
+ *
+ * No kernel C function calls after this.
+ */
+ ldr x0, [tsk, THREAD_SCTLR_USER]
+ tbz x0, SCTLR_ELx_ENIA_SHIFT, 1f
+ __ptrauth_keys_install_user tsk, x0, x1, x2
+ b 2f
+1:
+ mrs x0, sctlr_el1
+ bic x0, x0, SCTLR_ELx_ENIA
+ msr sctlr_el1, x0
+2:
+alternative_else_nop_endif
+#endif
mte_set_user_gcr tsk, x0, x1
@@ -493,28 +533,14 @@ tsk .req x28 // current thread_info
/*
* Interrupt handling.
*/
- .macro irq_handler
- ldr_l x1, handle_arch_irq
+ .macro irq_handler, handler:req
+ ldr_l x1, \handler
mov x0, sp
irq_stack_entry
blr x1
irq_stack_exit
.endm
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- /*
- * Set res to 0 if irqs were unmasked in interrupted context.
- * Otherwise set res to non-0 value.
- */
- .macro test_irqs_unmasked res:req, pmr:req
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- sub \res, \pmr, #GIC_PRIO_IRQON
-alternative_else
- mov \res, xzr
-alternative_endif
- .endm
-#endif
-
.macro gic_prio_kentry_setup, tmp:req
#ifdef CONFIG_ARM64_PSEUDO_NMI
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
@@ -533,6 +559,47 @@ alternative_endif
#endif
.endm
+ .macro el1_interrupt_handler, handler:req
+ gic_prio_irq_setup pmr=x20, tmp=x1
+ enable_da
+
+ mov x0, sp
+ bl enter_el1_irq_or_nmi
+
+ irq_handler \handler
+
+#ifdef CONFIG_PREEMPTION
+ ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
+alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ /*
+ * DA were cleared at start of handling, and IF are cleared by
+ * the GIC irqchip driver using gic_arch_enable_irqs() for
+ * normal IRQs. If anything is set, it means we come back from
+ * an NMI instead of a normal IRQ, so skip preemption
+ */
+ mrs x0, daif
+ orr x24, x24, x0
+alternative_else_nop_endif
+ cbnz x24, 1f // preempt count != 0 || NMI return path
+ bl arm64_preempt_schedule_irq // irq en/disable is done inside
+1:
+#endif
+
+ mov x0, sp
+ bl exit_el1_irq_or_nmi
+ .endm
+
+ .macro el0_interrupt_handler, handler:req
+ gic_prio_irq_setup pmr=x20, tmp=x0
+ user_exit_irqoff
+ enable_da
+
+ tbz x22, #55, 1f
+ bl do_el0_irq_bp_hardening
+1:
+ irq_handler \handler
+ .endm
+
.text
/*
@@ -549,18 +616,18 @@ SYM_CODE_START(vectors)
kernel_ventry 1, sync // Synchronous EL1h
kernel_ventry 1, irq // IRQ EL1h
- kernel_ventry 1, fiq_invalid // FIQ EL1h
+ kernel_ventry 1, fiq // FIQ EL1h
kernel_ventry 1, error // Error EL1h
kernel_ventry 0, sync // Synchronous 64-bit EL0
kernel_ventry 0, irq // IRQ 64-bit EL0
- kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
+ kernel_ventry 0, fiq // FIQ 64-bit EL0
kernel_ventry 0, error // Error 64-bit EL0
#ifdef CONFIG_COMPAT
kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
- kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
+ kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
#else
kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
@@ -626,12 +693,6 @@ SYM_CODE_START_LOCAL(el0_error_invalid)
inv_entry 0, BAD_ERROR
SYM_CODE_END(el0_error_invalid)
-#ifdef CONFIG_COMPAT
-SYM_CODE_START_LOCAL(el0_fiq_invalid_compat)
- inv_entry 0, BAD_FIQ, 32
-SYM_CODE_END(el0_fiq_invalid_compat)
-#endif
-
SYM_CODE_START_LOCAL(el1_sync_invalid)
inv_entry 1, BAD_SYNC
SYM_CODE_END(el1_sync_invalid)
@@ -662,35 +723,16 @@ SYM_CODE_END(el1_sync)
.align 6
SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
kernel_entry 1
- gic_prio_irq_setup pmr=x20, tmp=x1
- enable_da_f
-
- mov x0, sp
- bl enter_el1_irq_or_nmi
-
- irq_handler
-
-#ifdef CONFIG_PREEMPTION
- ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- /*
- * DA_F were cleared at start of handling. If anything is set in DAIF,
- * we come back from an NMI, so skip preemption
- */
- mrs x0, daif
- orr x24, x24, x0
-alternative_else_nop_endif
- cbnz x24, 1f // preempt count != 0 || NMI return path
- bl arm64_preempt_schedule_irq // irq en/disable is done inside
-1:
-#endif
-
- mov x0, sp
- bl exit_el1_irq_or_nmi
-
+ el1_interrupt_handler handle_arch_irq
kernel_exit 1
SYM_CODE_END(el1_irq)
+SYM_CODE_START_LOCAL_NOALIGN(el1_fiq)
+ kernel_entry 1
+ el1_interrupt_handler handle_arch_fiq
+ kernel_exit 1
+SYM_CODE_END(el1_fiq)
+
/*
* EL0 mode handlers.
*/
@@ -717,6 +759,11 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat)
b el0_irq_naked
SYM_CODE_END(el0_irq_compat)
+SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat)
+ kernel_entry 0, 32
+ b el0_fiq_naked
+SYM_CODE_END(el0_fiq_compat)
+
SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
kernel_entry 0, 32
b el0_error_naked
@@ -727,18 +774,17 @@ SYM_CODE_END(el0_error_compat)
SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
kernel_entry 0
el0_irq_naked:
- gic_prio_irq_setup pmr=x20, tmp=x0
- user_exit_irqoff
- enable_da_f
-
- tbz x22, #55, 1f
- bl do_el0_irq_bp_hardening
-1:
- irq_handler
-
+ el0_interrupt_handler handle_arch_irq
b ret_to_user
SYM_CODE_END(el0_irq)
+SYM_CODE_START_LOCAL_NOALIGN(el0_fiq)
+ kernel_entry 0
+el0_fiq_naked:
+ el0_interrupt_handler handle_arch_fiq
+ b ret_to_user
+SYM_CODE_END(el0_fiq)
+
SYM_CODE_START_LOCAL(el1_error)
kernel_entry 1
mrs x1, esr_el1
@@ -759,7 +805,7 @@ el0_error_naked:
mov x0, sp
mov x1, x25
bl do_serror
- enable_da_f
+ enable_da
b ret_to_user
SYM_CODE_END(el0_error)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 062b21f30f94..ad3dd34a83cf 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -180,7 +180,7 @@ static void __get_cpu_fpsimd_context(void)
*/
static void get_cpu_fpsimd_context(void)
{
- preempt_disable();
+ local_bh_disable();
__get_cpu_fpsimd_context();
}
@@ -201,7 +201,7 @@ static void __put_cpu_fpsimd_context(void)
static void put_cpu_fpsimd_context(void)
{
__put_cpu_fpsimd_context();
- preempt_enable();
+ local_bh_enable();
}
static bool have_cpu_fpsimd_context(void)
@@ -285,7 +285,7 @@ static void task_fpsimd_load(void)
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
- if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE))
sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
sve_vq_from_vl(current->thread.sve_vl) - 1);
@@ -307,7 +307,8 @@ static void fpsimd_save(void)
WARN_ON(!have_cpu_fpsimd_context());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
- if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ test_thread_flag(TIF_SVE)) {
if (WARN_ON(sve_get_vl() != last->sve_vl)) {
/*
* Can't save the user regs, so current would
@@ -926,9 +927,8 @@ void fpsimd_release_task(struct task_struct *dead_task)
* Trapped SVE access
*
* Storage is allocated for the full SVE state, the current FPSIMD
- * register contents are migrated across, and TIF_SVE is set so that
- * the SVE access trap will be disabled the next time this task
- * reaches ret_to_user.
+ * register contents are migrated across, and the access trap is
+ * disabled.
*
* TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
* would have disabled the SVE access trap for userspace during
@@ -946,15 +946,24 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
get_cpu_fpsimd_context();
- fpsimd_save();
-
- /* Force ret_to_user to reload the registers: */
- fpsimd_flush_task_state(current);
-
- fpsimd_to_sve(current);
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */
+ /*
+ * Convert the FPSIMD state to SVE, zeroing all the state that
+ * is not shared with FPSIMD. If (as is likely) the current
+ * state is live in the registers then do this there and
+ * update our metadata for the current task including
+ * disabling the trap, otherwise update our in-memory copy.
+ */
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
+ sve_flush_live();
+ fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_to_sve(current);
+ }
+
put_cpu_fpsimd_context();
}
@@ -1092,7 +1101,7 @@ void fpsimd_preserve_current_state(void)
void fpsimd_signal_preserve_current_state(void)
{
fpsimd_preserve_current_state();
- if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ if (test_thread_flag(TIF_SVE))
sve_to_fpsimd(current);
}
@@ -1181,7 +1190,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
get_cpu_fpsimd_context();
current->thread.uw.fpsimd_state = *state;
- if (system_supports_sve() && test_thread_flag(TIF_SVE))
+ if (test_thread_flag(TIF_SVE))
fpsimd_to_sve(current);
task_fpsimd_load();
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 840bda1869e9..96873dfa67fd 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr)
* booted in EL1 or EL2 respectively.
*/
SYM_FUNC_START(init_kernel_el)
- mov_q x0, INIT_SCTLR_EL1_MMU_OFF
- msr sctlr_el1, x0
-
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq init_el2
SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr sctlr_el1, x0
isb
mov_q x0, INIT_PSTATE_EL1
msr spsr_el1, x0
@@ -504,9 +503,43 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
msr vbar_el2, x0
isb
+ /*
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
+ * making it impossible to start in nVHE mode. Is that
+ * compliant with the architecture? Absolutely not!
+ */
+ mrs x0, hcr_el2
+ and x0, x0, #HCR_E2H
+ cbz x0, 1f
+
+ /* Switching to VHE requires a sane SCTLR_EL1 as a start */
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr_s SYS_SCTLR_EL12, x0
+
+ /*
+ * Force an eret into a helper "function", and let it return
+ * to our original caller... This makes sure that we have
+ * initialised the basic PSTATE state.
+ */
+ mov x0, #INIT_PSTATE_EL2
+ msr spsr_el1, x0
+ adr x0, __cpu_stick_to_vhe
+ msr elr_el1, x0
+ eret
+
+1:
+ mov_q x0, INIT_SCTLR_EL1_MMU_OFF
+ msr sctlr_el1, x0
+
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
eret
+
+__cpu_stick_to_vhe:
+ mov x0, #HVC_VHE_RESTART
+ hvc #0
+ mov x0, #BOOT_CPU_MODE_EL2
+ ret
SYM_FUNC_END(init_kernel_el)
/*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 5eccbd62fec8..74ad3db061d1 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors)
ventry el2_fiq_invalid // FIQ EL2t
ventry el2_error_invalid // Error EL2t
- ventry el2_sync_invalid // Synchronous EL2h
+ ventry elx_sync // Synchronous EL2h
ventry el2_irq_invalid // IRQ EL2h
ventry el2_fiq_invalid // FIQ EL2h
ventry el2_error_invalid // Error EL2h
- ventry el1_sync // Synchronous 64-bit EL1
+ ventry elx_sync // Synchronous 64-bit EL1
ventry el1_irq_invalid // IRQ 64-bit EL1
ventry el1_fiq_invalid // FIQ 64-bit EL1
ventry el1_error_invalid // Error 64-bit EL1
@@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors)
.align 11
-SYM_CODE_START_LOCAL(el1_sync)
+SYM_CODE_START_LOCAL(elx_sync)
cmp x0, #HVC_SET_VECTORS
b.ne 1f
msr vbar_el2, x1
@@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync)
9: mov x0, xzr
eret
-SYM_CODE_END(el1_sync)
+SYM_CODE_END(elx_sync)
// nVHE? No way! Give me the real thing!
SYM_CODE_START_LOCAL(mutate_to_vhe)
@@ -224,7 +224,6 @@ SYM_FUNC_END(__hyp_reset_vectors)
* Entry point to switch to VHE if deemed capable
*/
SYM_FUNC_START(switch_to_vhe)
-#ifdef CONFIG_ARM64_VHE
// Need to have booted at EL2
adr_l x1, __boot_cpu_mode
ldr w0, [x1]
@@ -240,6 +239,5 @@ SYM_FUNC_START(switch_to_vhe)
mov x0, #HVC_VHE_RESTART
hvc #0
1:
-#endif
ret
SYM_FUNC_END(switch_to_vhe)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 83f1c4b92095..e628c8ce1ffe 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -25,14 +25,26 @@ struct ftr_set_desc {
struct {
char name[FTR_DESC_FIELD_LEN];
u8 shift;
+ bool (*filter)(u64 val);
} fields[];
};
+static bool __init mmfr1_vh_filter(u64 val)
+{
+ /*
+ * If we ever reach this point while running VHE, we're
+ * guaranteed to be on one of these funky, VHE-stuck CPUs. If
+ * the user was trying to force nVHE on us, proceed with
+ * attitude adjustment.
+ */
+ return !(is_kernel_in_hyp_mode() && val == 0);
+}
+
static const struct ftr_set_desc mmfr1 __initconst = {
.name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override,
.fields = {
- { "vh", ID_AA64MMFR1_VHE_SHIFT },
+ { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter },
{}
},
};
@@ -124,6 +136,18 @@ static void __init match_options(const char *cmdline)
if (find_field(cmdline, regs[i], f, &v))
continue;
+ /*
+ * If an override gets filtered out, advertise
+ * it by setting the value to 0xf, but
+ * clearing the mask... Yes, this is fragile.
+ */
+ if (regs[i]->fields[f].filter &&
+ !regs[i]->fields[f].filter(v)) {
+ regs[i]->override->val |= mask;
+ regs[i]->override->mask &= ~mask;
+ continue;
+ }
+
regs[i]->override->val &= ~mask;
regs[i]->override->val |= (v << shift) & mask;
regs[i]->override->mask |= mask;
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index dfb1feab867d..bda49430c9ea 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -71,13 +71,44 @@ static void init_irq_stacks(void)
}
#endif
+static void default_handle_irq(struct pt_regs *regs)
+{
+ panic("IRQ taken without a root IRQ handler\n");
+}
+
+static void default_handle_fiq(struct pt_regs *regs)
+{
+ panic("FIQ taken without a root FIQ handler\n");
+}
+
+void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
+void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq;
+
+int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
+{
+ if (handle_arch_irq != default_handle_irq)
+ return -EBUSY;
+
+ handle_arch_irq = handle_irq;
+ pr_info("Root IRQ handler: %ps\n", handle_irq);
+ return 0;
+}
+
+int __init set_handle_fiq(void (*handle_fiq)(struct pt_regs *))
+{
+ if (handle_arch_fiq != default_handle_fiq)
+ return -EBUSY;
+
+ handle_arch_fiq = handle_fiq;
+ pr_info("Root FIQ handler: %ps\n", handle_fiq);
+ return 0;
+}
+
void __init init_IRQ(void)
{
init_irq_stacks();
init_irq_scs();
irqchip_init();
- if (!handle_arch_irq)
- panic("No interrupt controller found.");
if (system_uses_irq_prio_masking()) {
/*
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 27f8939deb1b..341342b207f6 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -128,15 +128,17 @@ u64 __init kaslr_early_init(void)
/* use the top 16 bits to randomize the linear region */
memstart_offset_seed = seed >> 48;
- if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
- IS_ENABLED(CONFIG_KASAN_SW_TAGS))
+ if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) &&
+ (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
+ IS_ENABLED(CONFIG_KASAN_SW_TAGS)))
/*
- * KASAN does not expect the module region to intersect the
- * vmalloc region, since shadow memory is allocated for each
- * module at load time, whereas the vmalloc region is shadowed
- * by KASAN zero pages. So keep modules out of the vmalloc
- * region if KASAN is enabled, and put the kernel well within
- * 4 GB of the module region.
+ * KASAN without KASAN_VMALLOC does not expect the module region
+ * to intersect the vmalloc region, since shadow memory is
+ * allocated for each module at load time, whereas the vmalloc
+ * region is shadowed by KASAN zero pages. So keep modules
+ * out of the vmalloc region if KASAN is enabled without
+ * KASAN_VMALLOC, and put the kernel well within 4 GB of the
+ * module region.
*/
return offset % SZ_2G;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index fe21e0f06492..b5ec010c481f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -40,14 +40,16 @@ void *module_alloc(unsigned long size)
NUMA_NO_NODE, __builtin_return_address(0));
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
- !IS_ENABLED(CONFIG_KASAN_GENERIC) &&
- !IS_ENABLED(CONFIG_KASAN_SW_TAGS))
+ (IS_ENABLED(CONFIG_KASAN_VMALLOC) ||
+ (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+ !IS_ENABLED(CONFIG_KASAN_SW_TAGS))))
/*
- * KASAN can only deal with module allocations being served
- * from the reserved module region, since the remainder of
- * the vmalloc region is already backed by zero shadow pages,
- * and punching holes into it is non-trivial. Since the module
- * region is not randomized when KASAN is enabled, it is even
+ * KASAN without KASAN_VMALLOC can only deal with module
+ * allocations being served from the reserved module region,
+ * since the remainder of the vmalloc region is already
+ * backed by zero shadow pages, and punching holes into it
+ * is non-trivial. Since the module region is not randomized
+ * when KASAN is enabled without KASAN_VMALLOC, it is even
* less likely that the module region gets exhausted, so we
* can simply omit this fallback in that case.
*/
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index b3c70a612c7a..125a10e413e9 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -26,6 +26,12 @@ u64 gcr_kernel_excl __ro_after_init;
static bool report_fault_once = true;
+#ifdef CONFIG_KASAN_HW_TAGS
+/* Whether the MTE asynchronous mode is enabled. */
+DEFINE_STATIC_KEY_FALSE(mte_async_mode);
+EXPORT_SYMBOL_GPL(mte_async_mode);
+#endif
+
static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
{
pte_t old_pte = READ_ONCE(*ptep);
@@ -107,13 +113,45 @@ void mte_init_tags(u64 max_tag)
write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1);
}
-void mte_enable_kernel(void)
+static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
{
/* Enable MTE Sync Mode for EL1. */
- sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_SYNC);
+ sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf);
isb();
+
+ pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
+}
+
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_enable_kernel_sync(void)
+{
+ /*
+ * Make sure we enter this function when no PE has set
+ * async mode previously.
+ */
+ WARN_ONCE(system_uses_mte_async_mode(),
+ "MTE async mode enabled system wide!");
+
+ __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
}
+void mte_enable_kernel_async(void)
+{
+ __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
+
+ /*
+ * MTE async mode is set system wide by the first PE that
+ * executes this function.
+ *
+ * Note: If in future KASAN acquires a runtime switching
+ * mode in between sync and async, this strategy needs
+ * to be reviewed.
+ */
+ if (!system_uses_mte_async_mode())
+ static_branch_enable(&mte_async_mode);
+}
+#endif
+
void mte_set_report_once(bool state)
{
WRITE_ONCE(report_fault_once, state);
@@ -124,25 +162,28 @@ bool mte_report_once(void)
return READ_ONCE(report_fault_once);
}
-static void update_sctlr_el1_tcf0(u64 tcf0)
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_check_tfsr_el1(void)
{
- /* ISB required for the kernel uaccess routines */
- sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
- isb();
-}
+ u64 tfsr_el1;
-static void set_sctlr_el1_tcf0(u64 tcf0)
-{
- /*
- * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
- * optimisation. Disable preemption so that it does not see
- * the variable update before the SCTLR_EL1.TCF0 one.
- */
- preempt_disable();
- current->thread.sctlr_tcf0 = tcf0;
- update_sctlr_el1_tcf0(tcf0);
- preempt_enable();
+ if (!system_supports_mte())
+ return;
+
+ tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
+
+ if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
+ /*
+ * Note: isb() is not required after this direct write
+ * because there is no indirect read subsequent to it
+ * (per ARM DDI 0487F.c table D13-1).
+ */
+ write_sysreg_s(0, SYS_TFSR_EL1);
+
+ kasan_report_async();
+ }
}
+#endif
static void update_gcr_el1_excl(u64 excl)
{
@@ -166,7 +207,7 @@ static void set_gcr_el1_excl(u64 excl)
*/
}
-void flush_mte_state(void)
+void mte_thread_init_user(void)
{
if (!system_supports_mte())
return;
@@ -176,19 +217,39 @@ void flush_mte_state(void)
write_sysreg_s(0, SYS_TFSRE0_EL1);
clear_thread_flag(TIF_MTE_ASYNC_FAULT);
/* disable tag checking */
- set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+ set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
+ SCTLR_EL1_TCF0_NONE);
/* reset tag generation mask */
set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
}
void mte_thread_switch(struct task_struct *next)
{
+ /*
+ * Check if an async tag exception occurred at EL1.
+ *
+ * Note: On the context switch path we rely on the dsb() present
+ * in __switch_to() to guarantee that the indirect writes to TFSR_EL1
+ * are synchronized before this point.
+ */
+ isb();
+ mte_check_tfsr_el1();
+}
+
+void mte_suspend_enter(void)
+{
if (!system_supports_mte())
return;
- /* avoid expensive SCTLR_EL1 accesses if no change */
- if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
- update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+ /*
+ * The barriers are required to guarantee that the indirect writes
+ * to TFSR_EL1 are synchronized before we report the state.
+ */
+ dsb(nsh);
+ isb();
+
+ /* Report SYS_TFSR_EL1 before suspend entry */
+ mte_check_tfsr_el1();
}
void mte_suspend_exit(void)
@@ -201,7 +262,7 @@ void mte_suspend_exit(void)
long set_mte_ctrl(struct task_struct *task, unsigned long arg)
{
- u64 tcf0;
+ u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
SYS_GCR_EL1_EXCL_MASK;
@@ -210,23 +271,23 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
switch (arg & PR_MTE_TCF_MASK) {
case PR_MTE_TCF_NONE:
- tcf0 = SCTLR_EL1_TCF0_NONE;
+ sctlr |= SCTLR_EL1_TCF0_NONE;
break;
case PR_MTE_TCF_SYNC:
- tcf0 = SCTLR_EL1_TCF0_SYNC;
+ sctlr |= SCTLR_EL1_TCF0_SYNC;
break;
case PR_MTE_TCF_ASYNC:
- tcf0 = SCTLR_EL1_TCF0_ASYNC;
+ sctlr |= SCTLR_EL1_TCF0_ASYNC;
break;
default:
return -EINVAL;
}
if (task != current) {
- task->thread.sctlr_tcf0 = tcf0;
+ task->thread.sctlr_user = sctlr;
task->thread.gcr_user_excl = gcr_excl;
} else {
- set_sctlr_el1_tcf0(tcf0);
+ set_task_sctlr_el1(sctlr);
set_gcr_el1_excl(gcr_excl);
}
@@ -243,7 +304,7 @@ long get_mte_ctrl(struct task_struct *task)
ret = incl << PR_MTE_TAG_SHIFT;
- switch (task->thread.sctlr_tcf0) {
+ switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
case SCTLR_EL1_TCF0_NONE:
ret |= PR_MTE_TCF_NONE;
break;
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index c07d7a034941..75fed4460407 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -18,6 +18,7 @@
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/static_call.h>
#include <asm/paravirt.h>
#include <asm/pvclock-abi.h>
@@ -26,8 +27,12 @@
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
-struct paravirt_patch_template pv_ops;
-EXPORT_SYMBOL_GPL(pv_ops);
+static u64 native_steal_clock(int cpu)
+{
+ return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
struct pv_time_stolen_time_region {
struct pvclock_vcpu_stolen_time *kaddr;
@@ -45,7 +50,7 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
/* return stolen time in ns by asking the hypervisor */
-static u64 pv_steal_clock(int cpu)
+static u64 para_steal_clock(int cpu)
{
struct pv_time_stolen_time_region *reg;
@@ -150,7 +155,7 @@ int __init pv_time_init(void)
if (ret)
return ret;
- pv_ops.time.steal_clock = pv_steal_clock;
+ static_call_update(pv_steal_clock, para_steal_clock);
static_key_slow_inc(&paravirt_steal_enabled);
if (steal_acc)
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4658fcf88c2b..f594957e29bd 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -470,9 +470,8 @@ static inline u64 armv8pmu_read_evcntr(int idx)
static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
{
int idx = event->hw.idx;
- u64 val = 0;
+ u64 val = armv8pmu_read_evcntr(idx);
- val = armv8pmu_read_evcntr(idx);
if (armv8pmu_event_is_chained(event))
val = (val << 32) | armv8pmu_read_evcntr(idx - 1);
return val;
@@ -520,7 +519,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- u64 value = 0;
+ u64 value;
if (idx == ARMV8_IDX_CYCLE_COUNTER)
value = read_sysreg(pmccntr_el0);
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index adb955fd9bdd..60901ab0a7fe 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -43,6 +43,69 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
get_random_bytes(&keys->apdb, sizeof(keys->apdb));
if (arg & PR_PAC_APGAKEY)
get_random_bytes(&keys->apga, sizeof(keys->apga));
+ ptrauth_keys_install_user(keys);
return 0;
}
+
+static u64 arg_to_enxx_mask(unsigned long arg)
+{
+ u64 sctlr_enxx_mask = 0;
+
+ WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
+ if (arg & PR_PAC_APIAKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENIA;
+ if (arg & PR_PAC_APIBKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENIB;
+ if (arg & PR_PAC_APDAKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENDA;
+ if (arg & PR_PAC_APDBKEY)
+ sctlr_enxx_mask |= SCTLR_ELx_ENDB;
+ return sctlr_enxx_mask;
+}
+
+int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+ unsigned long enabled)
+{
+ u64 sctlr = tsk->thread.sctlr_user;
+
+ if (!system_supports_address_auth())
+ return -EINVAL;
+
+ if (is_compat_thread(task_thread_info(tsk)))
+ return -EINVAL;
+
+ if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
+ return -EINVAL;
+
+ sctlr &= ~arg_to_enxx_mask(keys);
+ sctlr |= arg_to_enxx_mask(enabled);
+ if (tsk == current)
+ set_task_sctlr_el1(sctlr);
+ else
+ tsk->thread.sctlr_user = sctlr;
+
+ return 0;
+}
+
+int ptrauth_get_enabled_keys(struct task_struct *tsk)
+{
+ int retval = 0;
+
+ if (!system_supports_address_auth())
+ return -EINVAL;
+
+ if (is_compat_thread(task_thread_info(tsk)))
+ return -EINVAL;
+
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENIA)
+ retval |= PR_PAC_APIAKEY;
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENIB)
+ retval |= PR_PAC_APIBKEY;
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENDA)
+ retval |= PR_PAC_APDAKEY;
+ if (tsk->thread.sctlr_user & SCTLR_ELx_ENDB)
+ retval |= PR_PAC_APDBKEY;
+
+ return retval;
+}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 85645b2b0c7a..d607c9912025 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -264,8 +264,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
* normal page fault.
*/
instruction_pointer_set(regs, (unsigned long) cur->addr);
- if (!instruction_pointer(regs))
- BUG();
+ BUG_ON(!instruction_pointer(regs));
if (kcb->kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe(kcb);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6e60aa3b5ea9..cbf52109583b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -86,7 +86,7 @@ static void noinstr __cpu_do_idle_irqprio(void)
unsigned long daif_bits;
daif_bits = read_sysreg(daif);
- write_sysreg(daif_bits | PSR_I_BIT, daif);
+ write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif);
/*
* Unmask PMR before going idle to make sure interrupts can
@@ -341,7 +341,6 @@ void flush_thread(void)
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
- flush_mte_state();
}
void release_thread(struct task_struct *dead_task)
@@ -531,6 +530,31 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
write_sysreg(val, cntkctl_el1);
}
+static void update_sctlr_el1(u64 sctlr)
+{
+ /*
+ * EnIA must not be cleared while in the kernel as this is necessary for
+ * in-kernel PAC. It will be cleared on kernel exit if needed.
+ */
+ sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr);
+
+ /* ISB required for the kernel uaccess routines when setting TCF0. */
+ isb();
+}
+
+void set_task_sctlr_el1(u64 sctlr)
+{
+ /*
+ * __switch_to() checks current->thread.sctlr as an
+ * optimisation. Disable preemption so that it does not see
+ * the variable update before the SCTLR_EL1 one.
+ */
+ preempt_disable();
+ current->thread.sctlr_user = sctlr;
+ update_sctlr_el1(sctlr);
+ preempt_enable();
+}
+
/*
* Thread switching.
*/
@@ -546,6 +570,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
entry_task_switch(next);
ssbs_thread_switch(next);
erratum_1418040_thread_switch(prev, next);
+ ptrauth_thread_switch_user(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -561,6 +586,9 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
* registers.
*/
mte_thread_switch(next);
+ /* avoid expensive SCTLR_EL1 accesses if no change */
+ if (prev->thread.sctlr_user != next->thread.sctlr_user)
+ update_sctlr_el1(next->thread.sctlr_user);
/* the actual thread switch */
last = cpu_switch_to(prev, next);
@@ -610,7 +638,8 @@ void arch_setup_new_exec(void)
{
current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
- ptrauth_thread_init_user(current);
+ ptrauth_thread_init_user();
+ mte_thread_init_user();
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 170f42fd6101..eb2f73939b7b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -909,6 +909,38 @@ static int pac_mask_get(struct task_struct *target,
return membuf_write(&to, &uregs, sizeof(uregs));
}
+static int pac_enabled_keys_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ long enabled_keys = ptrauth_get_enabled_keys(target);
+
+ if (IS_ERR_VALUE(enabled_keys))
+ return enabled_keys;
+
+ return membuf_write(&to, &enabled_keys, sizeof(enabled_keys));
+}
+
+static int pac_enabled_keys_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ long enabled_keys = ptrauth_get_enabled_keys(target);
+
+ if (IS_ERR_VALUE(enabled_keys))
+ return enabled_keys;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &enabled_keys, 0,
+ sizeof(long));
+ if (ret)
+ return ret;
+
+ return ptrauth_set_enabled_keys(target, PR_PAC_ENABLED_KEYS_MASK,
+ enabled_keys);
+}
+
#ifdef CONFIG_CHECKPOINT_RESTORE
static __uint128_t pac_key_to_user(const struct ptrauth_key *key)
{
@@ -1074,6 +1106,7 @@ enum aarch64_regset {
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
REGSET_PAC_MASK,
+ REGSET_PAC_ENABLED_KEYS,
#ifdef CONFIG_CHECKPOINT_RESTORE
REGSET_PACA_KEYS,
REGSET_PACG_KEYS,
@@ -1160,6 +1193,14 @@ static const struct user_regset aarch64_regsets[] = {
.regset_get = pac_mask_get,
/* this cannot be set dynamically */
},
+ [REGSET_PAC_ENABLED_KEYS] = {
+ .core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = pac_enabled_keys_get,
+ .set = pac_enabled_keys_set,
+ },
#ifdef CONFIG_CHECKPOINT_RESTORE
[REGSET_PACA_KEYS] = {
.core_note_type = NT_ARM_PACA_KEYS,
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 357590beaabb..dcd7041b2b07 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -188,6 +188,7 @@ static void init_gic_priority_masking(void)
cpuflags = read_sysreg(daif);
WARN_ON(!(cpuflags & PSR_I_BIT));
+ WARN_ON(!(cpuflags & PSR_F_BIT));
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d55bdfb7789c..84b676bcf867 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -32,6 +32,30 @@
* add sp, sp, #0x10
*/
+
+void start_backtrace(struct stackframe *frame, unsigned long fp,
+ unsigned long pc)
+{
+ frame->fp = fp;
+ frame->pc = pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ frame->graph = 0;
+#endif
+
+ /*
+ * Prime the first unwind.
+ *
+ * In unwind_frame() we'll check that the FP points to a valid stack,
+ * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
+ * treated as a transition to whichever stack that happens to be. The
+ * prev_fp value won't be used, but we set it to 0 such that it is
+ * definitely not an accessible stack address.
+ */
+ bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
+ frame->prev_fp = 0;
+ frame->prev_type = STACK_TYPE_UNKNOWN;
+}
+
/*
* Unwind from one frame record (A) to the next frame record (B).
*
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index d7564891ffe1..e3f72df9509d 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -74,8 +74,9 @@ void notrace __cpu_suspend_exit(void)
*/
spectre_v4_enable_mitigation(NULL);
- /* Restore additional MTE-specific configuration */
+ /* Restore additional feature-specific configuration */
mte_suspend_exit();
+ ptrauth_suspend_exit();
}
/*
@@ -91,6 +92,9 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
unsigned long flags;
struct sleep_stack_data state;
+ /* Report any MTE async fault before going to suspend */
+ mte_suspend_enter();
+
/*
* From this point debug exceptions are disabled to prevent
* updates to mdscr register (saved and restored along with
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index b9cf12b271d7..263d6c1a525f 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -5,6 +5,7 @@
#include <linux/errno.h>
#include <linux/nospec.h>
#include <linux/ptrace.h>
+#include <linux/randomize_kstack.h>
#include <linux/syscalls.h>
#include <asm/daifflags.h>
@@ -43,6 +44,8 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
{
long ret;
+ add_random_kstack_offset();
+
if (scno < sc_nr) {
syscall_fn_t syscall_fn;
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
@@ -55,6 +58,19 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
ret = lower_32_bits(ret);
regs->regs[0] = ret;
+
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * but not enough for arm64 stack utilization comfort. To keep
+ * reasonable stack head room, reduce the maximum offset to 9 bits.
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints: the AAPCS mandates a
+ * 16-byte (i.e. 4-bit) aligned SP at function boundaries.
+ *
+ * The resulting 5 bits of entropy is seen in SP[8:4].
+ */
+ choose_random_kstack_offset(get_random_int() & 0x1FF);
}
static inline bool has_syscall_work(unsigned long flags)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index cee5d04ea9ad..a61fc4f989b3 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -86,7 +86,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
-static int __vdso_init(enum vdso_abi abi)
+static int __init __vdso_init(enum vdso_abi abi)
{
int i;
struct page **vdso_pagelist;
@@ -271,6 +271,14 @@ enum aarch32_map {
static struct page *aarch32_vectors_page __ro_after_init;
static struct page *aarch32_sig_page __ro_after_init;
+static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ current->mm->context.sigpage = (void *)new_vma->vm_start;
+
+ return 0;
+}
+
static struct vm_special_mapping aarch32_vdso_maps[] = {
[AA32_MAP_VECTORS] = {
.name = "[vectors]", /* ABI */
@@ -279,6 +287,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
[AA32_MAP_SIGPAGE] = {
.name = "[sigpage]", /* ABI */
.pages = &aarch32_sig_page,
+ .mremap = aarch32_sigpage_mremap,
},
[AA32_MAP_VVAR] = {
.name = "[vvar]",
@@ -299,34 +308,35 @@ static int aarch32_alloc_kuser_vdso_page(void)
if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
return 0;
- vdso_page = get_zeroed_page(GFP_ATOMIC);
+ vdso_page = get_zeroed_page(GFP_KERNEL);
if (!vdso_page)
return -ENOMEM;
memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
kuser_sz);
aarch32_vectors_page = virt_to_page(vdso_page);
- flush_dcache_page(aarch32_vectors_page);
return 0;
}
+#define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1
static int aarch32_alloc_sigpage(void)
{
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
- unsigned long sigpage;
+ __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD);
+ void *sigpage;
- sigpage = get_zeroed_page(GFP_ATOMIC);
+ sigpage = (void *)__get_free_page(GFP_KERNEL);
if (!sigpage)
return -ENOMEM;
- memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz);
+ memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison));
+ memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz);
aarch32_sig_page = virt_to_page(sigpage);
- flush_dcache_page(aarch32_sig_page);
return 0;
}
-static int __aarch32_alloc_vdso_pages(void)
+static int __init __aarch32_alloc_vdso_pages(void)
{
if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 15a6c98ee92f..2f1b156021a6 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -86,7 +86,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
}
break;
case GICD_TYPER2:
- if (kvm_vgic_global_state.has_gicv4_1)
+ if (kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi())
value = GICD_TYPER2_nASSGIcap;
break;
case GICD_IIDR:
@@ -119,7 +119,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
/* Not a GICv4.1? No HW SGIs */
- if (!kvm_vgic_global_state.has_gicv4_1)
+ if (!kvm_vgic_global_state.has_gicv4_1 || !gic_cpuif_has_vsgi())
val &= ~GICD_CTLR_nASSGIreq;
/* Dist stays enabled? nASSGIreq is RO */
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f37d4e3830b7..871c82ab0a30 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -527,7 +527,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
const struct fault_info *inf;
struct mm_struct *mm = current->mm;
vm_fault_t fault;
- unsigned long vm_flags = VM_ACCESS_FLAGS;
+ unsigned long vm_flags;
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
unsigned long addr = untagged_addr(far);
@@ -544,12 +544,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
+ /*
+ * vm_flags tells us what bits we must have in vma->vm_flags
+ * for the fault to be benign, __do_page_fault() would check
+ * vma->vm_flags & vm_flags and returns an error if the
+ * intersection is empty
+ */
if (is_el0_instruction_abort(esr)) {
+ /* It was exec fault */
vm_flags = VM_EXEC;
mm_flags |= FAULT_FLAG_INSTRUCTION;
} else if (is_write_abort(esr)) {
+ /* It was write fault */
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
+ } else {
+ /* It was read fault */
+ vm_flags = VM_READ;
+ /* Write implies read */
+ vm_flags |= VM_WRITE;
+ /* If EPAN is absent then exec implies read */
+ if (!cpus_have_const_cap(ARM64_HAS_EPAN))
+ vm_flags |= VM_EXEC;
}
if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index d8e66c78440e..61b52a92b8b6 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -79,7 +79,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
phys_addr_t pmd_phys = early ?
__pa_symbol(kasan_early_shadow_pmd)
: kasan_alloc_zeroed_page(node);
- __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
+ __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
}
return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
@@ -92,7 +92,7 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node,
phys_addr_t pud_phys = early ?
__pa_symbol(kasan_early_shadow_pud)
: kasan_alloc_zeroed_page(node);
- __p4d_populate(p4dp, pud_phys, PMD_TYPE_TABLE);
+ __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE);
}
return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr);
@@ -214,15 +214,18 @@ static void __init kasan_init_shadow(void)
{
u64 kimg_shadow_start, kimg_shadow_end;
u64 mod_shadow_start, mod_shadow_end;
+ u64 vmalloc_shadow_end;
phys_addr_t pa_start, pa_end;
u64 i;
- kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK;
- kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end));
+ kimg_shadow_start = (u64)kasan_mem_to_shadow(KERNEL_START) & PAGE_MASK;
+ kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END));
mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
+ vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END);
+
/*
* We are going to perform proper setup of shadow memory.
* At first we should unmap early shadow (clear_pgds() call below).
@@ -237,16 +240,22 @@ static void __init kasan_init_shadow(void)
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
- early_pfn_to_nid(virt_to_pfn(lm_alias(_text))));
+ early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START))));
kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END),
(void *)mod_shadow_start);
- kasan_populate_early_shadow((void *)kimg_shadow_end,
- (void *)KASAN_SHADOW_END);
- if (kimg_shadow_start > mod_shadow_end)
- kasan_populate_early_shadow((void *)mod_shadow_end,
- (void *)kimg_shadow_start);
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ BUILD_BUG_ON(VMALLOC_START != MODULES_END);
+ kasan_populate_early_shadow((void *)vmalloc_shadow_end,
+ (void *)KASAN_SHADOW_END);
+ } else {
+ kasan_populate_early_shadow((void *)kimg_shadow_end,
+ (void *)KASAN_SHADOW_END);
+ if (kimg_shadow_start > mod_shadow_end)
+ kasan_populate_early_shadow((void *)mod_shadow_end,
+ (void *)kimg_shadow_start);
+ }
for_each_mem_range(i, &pa_start, &pa_end) {
void *start = (void *)__phys_to_virt(pa_start);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5d9550fdb9cf..d563335ad43f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -39,6 +39,7 @@
#define NO_BLOCK_MAPPINGS BIT(0)
#define NO_CONT_MAPPINGS BIT(1)
+#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
@@ -185,10 +186,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
BUG_ON(pmd_sect(pmd));
if (pmd_none(pmd)) {
+ pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN;
phys_addr_t pte_phys;
+
+ if (flags & NO_EXEC_MAPPINGS)
+ pmdval |= PMD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc(PAGE_SHIFT);
- __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+ __pmd_populate(pmdp, pte_phys, pmdval);
pmd = READ_ONCE(*pmdp);
}
BUG_ON(pmd_bad(pmd));
@@ -259,10 +264,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
*/
BUG_ON(pud_sect(pud));
if (pud_none(pud)) {
+ pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN;
phys_addr_t pmd_phys;
+
+ if (flags & NO_EXEC_MAPPINGS)
+ pudval |= PUD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc(PMD_SHIFT);
- __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+ __pud_populate(pudp, pmd_phys, pudval);
pud = READ_ONCE(*pudp);
}
BUG_ON(pud_bad(pud));
@@ -306,10 +315,14 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
p4d_t p4d = READ_ONCE(*p4dp);
if (p4d_none(p4d)) {
+ p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
phys_addr_t pud_phys;
+
+ if (flags & NO_EXEC_MAPPINGS)
+ p4dval |= P4D_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc(PUD_SHIFT);
- __p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE);
+ __p4d_populate(p4dp, pud_phys, p4dval);
p4d = READ_ONCE(*p4dp);
}
BUG_ON(p4d_bad(p4d));
@@ -486,14 +499,24 @@ early_param("crashkernel", enable_crash_mem_map);
static void __init map_mem(pgd_t *pgdp)
{
+ static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
phys_addr_t kernel_start = __pa_symbol(_stext);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
phys_addr_t start, end;
- int flags = 0;
+ int flags = NO_EXEC_MAPPINGS;
u64 i;
+ /*
+ * Setting hierarchical PXNTable attributes on table entries covering
+ * the linear region is only possible if it is guaranteed that no table
+ * entries at any level are being shared between the linear region and
+ * the vmalloc region. Check whether this is true for the PGD level, in
+ * which case it is guaranteed to be true for all other levels as well.
+ */
+ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
+
if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
- flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
* Take care not to create a writable alias for the
@@ -1210,11 +1233,11 @@ void __init early_fixmap_init(void)
pudp = pud_offset_kimg(p4dp, addr);
} else {
if (p4d_none(p4d))
- __p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+ __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE);
pudp = fixmap_pud(addr);
}
if (pud_none(READ_ONCE(*pudp)))
- __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+ __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE);
pmdp = fixmap_pmd(addr);
__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
@@ -1480,7 +1503,7 @@ struct range arch_get_mappable_range(void)
int arch_add_memory(int nid, u64 start, u64 size,
struct mhp_params *params)
{
- int ret, flags = 0;
+ int ret, flags = NO_EXEC_MAPPINGS;
VM_BUG_ON(!mhp_range_allowed(start, size, true));
@@ -1490,7 +1513,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
*/
if (rodata_full || debug_pagealloc_enabled() ||
IS_ENABLED(CONFIG_KFENCE))
- flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+ flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
size, params->pgprot, __pgd_pgtable_alloc,
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c967bfd30d2b..0a48191534ff 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -419,14 +419,17 @@ SYM_FUNC_START(__cpu_setup)
reset_amuserenr_el0 x1 // Disable AMU access from EL0
/*
- * Memory region attributes
+ * Default values for VMSA control registers. These will be adjusted
+ * below depending on detected CPU features.
*/
- mov_q x5, MAIR_EL1_SET
-#ifdef CONFIG_ARM64_MTE
- mte_tcr .req x20
-
- mov mte_tcr, #0
+ mair .req x17
+ tcr .req x16
+ mov_q mair, MAIR_EL1_SET
+ mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
+ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
+ TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
+#ifdef CONFIG_ARM64_MTE
/*
* Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
* (ID_AA64PFR1_EL1[11:8] > 1).
@@ -438,7 +441,7 @@ SYM_FUNC_START(__cpu_setup)
/* Normal Tagged memory type at the corresponding MAIR index */
mov x10, #MAIR_ATTR_NORMAL_TAGGED
- bfi x5, x10, #(8 * MT_NORMAL_TAGGED), #8
+ bfi mair, x10, #(8 * MT_NORMAL_TAGGED), #8
/* initialize GCR_EL1: all non-zero tags excluded by default */
mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
@@ -449,37 +452,26 @@ SYM_FUNC_START(__cpu_setup)
msr_s SYS_TFSRE0_EL1, xzr
/* set the TCR_EL1 bits */
- mov_q mte_tcr, TCR_KASAN_HW_FLAGS
+ mov_q x10, TCR_KASAN_HW_FLAGS
+ orr tcr, tcr, x10
1:
#endif
- msr mair_el1, x5
- /*
- * Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further
- * adjusted if the kernel is compiled with 52bit VA support.
- */
- mov_q x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
- TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
- TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
-#ifdef CONFIG_ARM64_MTE
- orr x10, x10, mte_tcr
- .unreq mte_tcr
-#endif
- tcr_clear_errata_bits x10, x9, x5
+ tcr_clear_errata_bits tcr, x9, x5
#ifdef CONFIG_ARM64_VA_BITS_52
ldr_l x9, vabits_actual
sub x9, xzr, x9
add x9, x9, #64
- tcr_set_t1sz x10, x9
+ tcr_set_t1sz tcr, x9
#else
ldr_l x9, idmap_t0sz
#endif
- tcr_set_t0sz x10, x9
+ tcr_set_t0sz tcr, x9
/*
* Set the IPS bits in TCR_EL1.
*/
- tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
+ tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6
#ifdef CONFIG_ARM64_HW_AFDBM
/*
* Enable hardware update of the Access Flags bit.
@@ -489,13 +481,17 @@ SYM_FUNC_START(__cpu_setup)
mrs x9, ID_AA64MMFR1_EL1
and x9, x9, #0xf
cbz x9, 1f
- orr x10, x10, #TCR_HA // hardware Access flag update
+ orr tcr, tcr, #TCR_HA // hardware Access flag update
1:
#endif /* CONFIG_ARM64_HW_AFDBM */
- msr tcr_el1, x10
+ msr mair_el1, mair
+ msr tcr_el1, tcr
/*
* Prepare SCTLR
*/
mov_q x0, INIT_SCTLR_EL1_MMU_ON
ret // return to head.S
+
+ .unreq mair
+ .unreq tcr
SYM_FUNC_END(__cpu_setup)
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 0e050d76b83a..a50e92ea1878 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -337,7 +337,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
ptdump_walk_pgd(&st.ptdump, info->mm, NULL);
}
-static void ptdump_initialize(void)
+static void __init ptdump_initialize(void)
{
unsigned i, j;
@@ -381,7 +381,7 @@ void ptdump_check_wx(void)
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
}
-static int ptdump_init(void)
+static int __init ptdump_init(void)
{
address_markers[PAGE_END_NR].start_address = PAGE_END;
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index d29d722ec3ec..68bf1a125502 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -16,7 +16,7 @@ static int ptdump_show(struct seq_file *m, void *v)
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
-void ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
{
debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
}
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 786656090c50..59b727b69357 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -580,12 +580,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -598,7 +594,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 9bb12be4a38e..8d4ddcebe7b8 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -536,12 +536,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -554,7 +550,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 413232626d9d..9cc9f1a06516 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -558,12 +558,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -576,7 +572,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 819cc70b06d8..c3f3f462e6ce 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -529,12 +529,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -547,7 +543,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 8f8d5968713b..8c908fc5c191 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -538,12 +538,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -556,7 +552,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index bf15e6c1c939..4e68b72d9c50 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -561,12 +561,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -579,7 +575,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 5466d48fcd9d..d31896293c39 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -647,12 +647,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -665,7 +661,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 93c305918838..c7442f9dd469 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -528,12 +528,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -546,7 +542,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index cacd6c617f69..233b82ea103a 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -529,12 +529,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -547,7 +543,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 3ae421cb24a4..664025a0f6a4 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -547,12 +547,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -565,7 +561,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 6da97e28c48e..73293a0b3dc8 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -531,12 +531,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -549,7 +545,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index f54481bb789a..bca8a6f3e92f 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -530,12 +530,8 @@ CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_AES_TI=m
@@ -548,7 +544,6 @@ CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
diff --git a/arch/m68k/fpsp040/Makefile b/arch/m68k/fpsp040/Makefile
index aab04d372ae7..834ae9471b88 100644
--- a/arch/m68k/fpsp040/Makefile
+++ b/arch/m68k/fpsp040/Makefile
@@ -10,7 +10,3 @@ obj-y := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \
ssin.o ssinh.o stan.o stanh.o sto_res.o stwotox.o tbldo.o util.o \
x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \
x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o
-
-EXTRA_LDFLAGS := -x
-
-$(OS_OBJS): fpsp.h
diff --git a/arch/m68k/ifpsp060/Makefile b/arch/m68k/ifpsp060/Makefile
index 43b435049452..56b530a96c2f 100644
--- a/arch/m68k/ifpsp060/Makefile
+++ b/arch/m68k/ifpsp060/Makefile
@@ -5,5 +5,3 @@
# for more details.
obj-y := fskeleton.o iskeleton.o os.o
-
-EXTRA_LDFLAGS := -x
diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h
index 257b29184af9..e28eb1c0e0bf 100644
--- a/arch/m68k/include/asm/mvme147hw.h
+++ b/arch/m68k/include/asm/mvme147hw.h
@@ -66,6 +66,9 @@ struct pcc_regs {
#define PCC_INT_ENAB 0x08
#define PCC_TIMER_INT_CLR 0x80
+
+#define PCC_TIMER_TIC_EN 0x01
+#define PCC_TIMER_COC_EN 0x02
#define PCC_TIMER_CLR_OVF 0x04
#define PCC_LEVEL_ABORT 0x07
diff --git a/arch/m68k/include/asm/sun3xflop.h b/arch/m68k/include/asm/sun3xflop.h
index 93f2a8431c0e..bce8aabb5380 100644
--- a/arch/m68k/include/asm/sun3xflop.h
+++ b/arch/m68k/include/asm/sun3xflop.h
@@ -106,7 +106,7 @@ static void sun3x_82072_fd_outb(unsigned char value, int port)
case 4: /* FD_STATUS */
*(sun3x_fdc.status_r) = value;
break;
- };
+ }
return;
}
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 1c235d8f53f3..f55bdcb8e4f1 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -388,6 +388,8 @@ sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out;
+
+ mmap_read_lock(current->mm);
} else {
struct vm_area_struct *vma;
diff --git a/arch/m68k/kernel/syscalls/Makefile b/arch/m68k/kernel/syscalls/Makefile
index 285aaba832d9..6713c65a25e1 100644
--- a/arch/m68k/kernel/syscalls/Makefile
+++ b/arch/m68k/kernel/syscalls/Makefile
@@ -6,20 +6,14 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \
$(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
syscall := $(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
quiet_cmd_syshdr = SYSHDR $@
- cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
- '$(syshdr_abis_$(basetarget))' \
- '$(syshdr_pfx_$(basetarget))' \
- '$(syshdr_offset_$(basetarget))'
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr $< $@
quiet_cmd_systbl = SYSTBL $@
- cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
- '$(systbl_abis_$(basetarget))' \
- '$(systbl_abi_$(basetarget))' \
- '$(systbl_offset_$(basetarget))'
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) $< $@
$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
diff --git a/arch/m68k/kernel/syscalls/syscallhdr.sh b/arch/m68k/kernel/syscalls/syscallhdr.sh
deleted file mode 100644
index 6f357d68ef44..000000000000
--- a/arch/m68k/kernel/syscalls/syscallhdr.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_M68K_`basename "$out" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- printf "#ifndef %s\n" "${fileguard}"
- printf "#define %s\n" "${fileguard}"
- printf "\n"
-
- nxt=0
- while read nr abi name entry ; do
- if [ -z "$offset" ]; then
- printf "#define __NR_%s%s\t%s\n" \
- "${prefix}" "${name}" "${nr}"
- else
- printf "#define __NR_%s%s\t(%s + %s)\n" \
- "${prefix}" "${name}" "${offset}" "${nr}"
- fi
- nxt=$((nr+1))
- done
-
- printf "\n"
- printf "#ifdef __KERNEL__\n"
- printf "#define __NR_syscalls\t%s\n" "${nxt}"
- printf "#endif\n"
- printf "\n"
- printf "#endif /* %s */\n" "${fileguard}"
-) > "$out"
diff --git a/arch/m68k/kernel/syscalls/syscalltbl.sh b/arch/m68k/kernel/syscalls/syscalltbl.sh
deleted file mode 100644
index 85d78d9309ad..000000000000
--- a/arch/m68k/kernel/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
- t_nxt="$1"
- t_nr="$2"
- t_entry="$3"
-
- while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s, sys_ni_syscall, )\n" "${t_nxt}"
- t_nxt=$((t_nxt+1))
- done
- printf "__SYSCALL(%s, %s, )\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- nxt=0
- if [ -z "$offset" ]; then
- offset=0
- fi
-
- while read nr abi name entry ; do
- emit $((nxt+offset)) $((nr+offset)) $entry
- nxt=$((nr+1))
- done
-) > "$out"
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index d329cc7b481c..e25ef4a9df30 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -18,9 +18,8 @@
#define sys_mmap2 sys_mmap_pgoff
#endif
-#define __SYSCALL(nr, entry, nargs) .long entry
+#define __SYSCALL(nr, entry) .long entry
.section .rodata
ALIGN
ENTRY(sys_call_table)
#include <asm/syscall_table.h>
-#undef __SYSCALL
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index cfdc7f912e14..e1e90c49a496 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -114,8 +114,10 @@ static irqreturn_t mvme147_timer_int (int irq, void *dev_id)
unsigned long flags;
local_irq_save(flags);
- m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR;
- m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF;
+ m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF | PCC_TIMER_COC_EN |
+ PCC_TIMER_TIC_EN;
+ m147_pcc->t1_int_cntrl = PCC_INT_ENAB | PCC_TIMER_INT_CLR |
+ PCC_LEVEL_TIMER1;
clk_total += PCC_TIMER_CYCLES;
legacy_timer_tick(1);
local_irq_restore(flags);
@@ -133,10 +135,10 @@ void mvme147_sched_init (void)
/* Init the clock with a value */
/* The clock counter increments until 0xFFFF then reloads */
m147_pcc->t1_preload = PCC_TIMER_PRELOAD;
- m147_pcc->t1_cntrl = 0x0; /* clear timer */
- m147_pcc->t1_cntrl = 0x3; /* start timer */
- m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; /* clear pending ints */
- m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
+ m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF | PCC_TIMER_COC_EN |
+ PCC_TIMER_TIC_EN;
+ m147_pcc->t1_int_cntrl = PCC_INT_ENAB | PCC_TIMER_INT_CLR |
+ PCC_LEVEL_TIMER1;
clocksource_register_hz(&mvme147_clk, PCC_TIMER_CLOCK_FREQ);
}
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 30357fe4ba6c..b59593c7cfb9 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -366,6 +366,7 @@ static u32 clk_total;
#define PCCTOVR1_COC_EN 0x02
#define PCCTOVR1_OVR_CLR 0x04
+#define PCCTIC1_INT_LEVEL 6
#define PCCTIC1_INT_CLR 0x08
#define PCCTIC1_INT_EN 0x10
@@ -374,8 +375,8 @@ static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
unsigned long flags;
local_irq_save(flags);
- out_8(PCCTIC1, in_8(PCCTIC1) | PCCTIC1_INT_CLR);
- out_8(PCCTOVR1, PCCTOVR1_OVR_CLR);
+ out_8(PCCTOVR1, PCCTOVR1_OVR_CLR | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN);
+ out_8(PCCTIC1, PCCTIC1_INT_EN | PCCTIC1_INT_CLR | PCCTIC1_INT_LEVEL);
clk_total += PCC_TIMER_CYCLES;
legacy_timer_tick(1);
local_irq_restore(flags);
@@ -389,14 +390,15 @@ void mvme16x_sched_init(void)
int irq;
/* Using PCCchip2 or MC2 chip tick timer 1 */
- out_be32(PCCTCNT1, 0);
- out_be32(PCCTCMP1, PCC_TIMER_CYCLES);
- out_8(PCCTOVR1, in_8(PCCTOVR1) | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN);
- out_8(PCCTIC1, PCCTIC1_INT_EN | 6);
if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, IRQF_TIMER, "timer",
NULL))
panic ("Couldn't register timer int");
+ out_be32(PCCTCNT1, 0);
+ out_be32(PCCTCMP1, PCC_TIMER_CYCLES);
+ out_8(PCCTOVR1, PCCTOVR1_OVR_CLR | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN);
+ out_8(PCCTIC1, PCCTIC1_INT_EN | PCCTIC1_INT_CLR | PCCTIC1_INT_LEVEL);
+
clocksource_register_hz(&mvme16x_clk, PCC_TIMER_CLOCK_FREQ);
if (brdno == 0x0162 || brdno == 0x172)
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
index fc881b46d911..bc6110fb98e0 100644
--- a/arch/mips/crypto/poly1305-glue.c
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -17,7 +17,7 @@ asmlinkage void poly1305_init_mips(void *state, const u8 *key);
asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce);
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_mips(&dctx->h, key);
dctx->s[0] = get_unaligned_le32(key + 16);
diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c
index cf33dd8a487e..c25a2ce5e29f 100644
--- a/arch/mips/netlogic/common/irq.c
+++ b/arch/mips/netlogic/common/irq.c
@@ -276,10 +276,6 @@ asmlinkage void plat_irq_dispatch(void)
}
#ifdef CONFIG_CPU_XLP
-static const struct irq_domain_ops xlp_pic_irq_domain_ops = {
- .xlate = irq_domain_xlate_onetwocell,
-};
-
static int __init xlp_of_pic_init(struct device_node *node,
struct device_node *parent)
{
@@ -324,7 +320,7 @@ static int __init xlp_of_pic_init(struct device_node *node,
xlp_pic_domain = irq_domain_add_legacy(node, n_picirqs,
nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE,
- &xlp_pic_irq_domain_ops, NULL);
+ &irq_domain_simple_ops, NULL);
if (xlp_pic_domain == NULL) {
pr_err("PIC %pOFn: Creating legacy domain failed!\n", node);
return -EINVAL;
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
index b1e577cbf00c..88e8ea73bfa7 100644
--- a/arch/powerpc/crypto/sha1-spe-glue.c
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -107,7 +107,7 @@ static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
src += bytes;
len -= bytes;
- };
+ }
memcpy((char *)sctx->buffer, src, len);
return 0;
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 7f1266c24f6b..101477b3e263 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -24,12 +24,6 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
}
}
-/*
- * This function returns an error if it detects any unreliable features of the
- * stack. Otherwise it guarantees that the stack trace is reliable.
- *
- * If the task is not 'current', the caller *must* ensure the task is inactive.
- */
int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
void *cookie, struct task_struct *task)
{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2792879d398e..f3db131be563 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -165,6 +165,7 @@ config X86
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if X86_64
+ select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_ASM_MODVERSIONS
select HAVE_CMPXCHG_DOUBLE
@@ -571,6 +572,7 @@ config X86_UV
depends on X86_EXTENDED_PLATFORM
depends on NUMA
depends on EFI
+ depends on KEXEC_CORE
depends on X86_X2APIC
depends on PCI
help
@@ -777,6 +779,7 @@ if HYPERVISOR_GUEST
config PARAVIRT
bool "Enable paravirtualization code"
+ depends on HAVE_STATIC_CALL
help
This changes the kernel so it can modify itself when it is run
under a hypervisor, potentially improving performance significantly
@@ -1406,7 +1409,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
- depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
+ depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
select X86_PAE
help
Select this if you have a 32-bit processor and more than 4
@@ -1518,6 +1521,7 @@ config AMD_MEM_ENCRYPT
select ARCH_USE_MEMREMAP_PROT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select INSTRUCTION_DECODER
+ select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
help
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
@@ -1931,6 +1935,7 @@ config X86_SGX
depends on CRYPTO_SHA256=y
select SRCU
select MMU_NOTIFIER
+ select NUMA_KEEP_MEMINFO if NUMA
help
Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
that can be used by applications to set aside private regions of code
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9a85eae37b17..78faf9c7e3ae 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -33,6 +33,7 @@ REALMODE_CFLAGS += -ffreestanding
REALMODE_CFLAGS += -fno-stack-protector
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
+REALMODE_CFLAGS += $(CLANG_FLAGS)
export REALMODE_CFLAGS
# BITS is used as extension for files which are available in a 32 bit
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e0bc3988c3fa..6e5522aebbbd 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -46,6 +46,7 @@ KBUILD_CFLAGS += -D__DISABLE_EXPORTS
# Disable relocation relaxation in case the link is not PIE.
KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
+KBUILD_CFLAGS += $(CLANG_FLAGS)
# sev-es.c indirectly inludes inat-table.h which is generated during
# compilation and stored in $(objtree). Add the directory to the includes so
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index c4bb0f9363f5..95a223b3e56a 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -5,7 +5,7 @@
* Early support for invoking 32-bit EFI services from a 64-bit kernel.
*
* Because this thunking occurs before ExitBootServices() we have to
- * restore the firmware's 32-bit GDT before we make EFI serivce calls,
+ * restore the firmware's 32-bit GDT before we make EFI service calls,
* since the firmware's 32-bit IDT is still currently installed and it
* needs to be able to service interrupts.
*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index e94874f4bbc1..a2347ded77ea 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -34,6 +34,7 @@
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
#include <asm/desc_defs.h>
+#include <asm/trapnr.h>
#include "pgtable.h"
/*
@@ -107,9 +108,19 @@ SYM_FUNC_START(startup_32)
movl %eax, %gs
movl %eax, %ss
-/* setup a stack and make sure cpu supports long mode. */
+ /* Setup a stack and load CS from current GDT */
leal rva(boot_stack_end)(%ebp), %esp
+ pushl $__KERNEL32_CS
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl
+1:
+
+ /* Setup Exception handling for SEV-ES */
+ call startup32_load_idt
+
+ /* Make sure cpu supports long mode. */
call verify_cpu
testl %eax, %eax
jnz .Lno_longmode
@@ -172,11 +183,21 @@ SYM_FUNC_START(startup_32)
*/
call get_sev_encryption_bit
xorl %edx, %edx
+#ifdef CONFIG_AMD_MEM_ENCRYPT
testl %eax, %eax
jz 1f
subl $32, %eax /* Encryption bit is always above bit 31 */
bts %eax, %edx /* Set encryption mask for page tables */
+ /*
+ * Mark SEV as active in sev_status so that startup32_check_sev_cbit()
+ * will do a check. The sev_status memory will be fully initialized
+ * with the contents of MSR_AMD_SEV_STATUS later in
+ * set_sev_encryption_mask(). For now it is sufficient to know that SEV
+ * is active.
+ */
+ movl $1, rva(sev_status)(%ebp)
1:
+#endif
/* Initialize Page tables to 0 */
leal rva(pgtable)(%ebx), %edi
@@ -231,7 +252,7 @@ SYM_FUNC_START(startup_32)
/*
* Setup for the jump to 64bit mode
*
- * When the jump is performend we will be in long mode but
+ * When the jump is performed we will be in long mode but
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
* (and in turn EFER.LMA = 1). To jump into 64bit mode we use
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
@@ -261,6 +282,9 @@ SYM_FUNC_START(startup_32)
movl %esi, %edx
1:
#endif
+ /* Check if the C-bit position is correct when SEV is active */
+ call startup32_check_sev_cbit
+
pushl $__KERNEL_CS
pushl %eax
@@ -694,6 +718,19 @@ SYM_DATA_START(boot_idt)
.endr
SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+SYM_DATA_START(boot32_idt_desc)
+ .word boot32_idt_end - boot32_idt - 1
+ .long 0
+SYM_DATA_END(boot32_idt_desc)
+ .balign 8
+SYM_DATA_START(boot32_idt)
+ .rept 32
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
+#endif
+
#ifdef CONFIG_EFI_STUB
SYM_DATA(image_offset, .long 0)
#endif
@@ -786,6 +823,137 @@ SYM_DATA_START_LOCAL(loaded_image_proto)
SYM_DATA_END(loaded_image_proto)
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ __HEAD
+ .code32
+/*
+ * Write an IDT entry into boot32_idt
+ *
+ * Parameters:
+ *
+ * %eax: Handler address
+ * %edx: Vector number
+ *
+ * Physical offset is expected in %ebp
+ */
+SYM_FUNC_START(startup32_set_idt_entry)
+ push %ebx
+ push %ecx
+
+ /* IDT entry address to %ebx */
+ leal rva(boot32_idt)(%ebp), %ebx
+ shl $3, %edx
+ addl %edx, %ebx
+
+ /* Build IDT entry, lower 4 bytes */
+ movl %eax, %edx
+ andl $0x0000ffff, %edx # Target code segment offset [15:0]
+ movl $__KERNEL32_CS, %ecx # Target code segment selector
+ shl $16, %ecx
+ orl %ecx, %edx
+
+ /* Store lower 4 bytes to IDT */
+ movl %edx, (%ebx)
+
+ /* Build IDT entry, upper 4 bytes */
+ movl %eax, %edx
+ andl $0xffff0000, %edx # Target code segment offset [31:16]
+ orl $0x00008e00, %edx # Present, Type 32-bit Interrupt Gate
+
+ /* Store upper 4 bytes to IDT */
+ movl %edx, 4(%ebx)
+
+ pop %ecx
+ pop %ebx
+ ret
+SYM_FUNC_END(startup32_set_idt_entry)
+#endif
+
+SYM_FUNC_START(startup32_load_idt)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* #VC handler */
+ leal rva(startup32_vc_handler)(%ebp), %eax
+ movl $X86_TRAP_VC, %edx
+ call startup32_set_idt_entry
+
+ /* Load IDT */
+ leal rva(boot32_idt)(%ebp), %eax
+ movl %eax, rva(boot32_idt_desc+2)(%ebp)
+ lidt rva(boot32_idt_desc)(%ebp)
+#endif
+ ret
+SYM_FUNC_END(startup32_load_idt)
+
+/*
+ * Check for the correct C-bit position when the startup_32 boot-path is used.
+ *
+ * The check makes use of the fact that all memory is encrypted when paging is
+ * disabled. The function creates 64 bits of random data using the RDRAND
+ * instruction. RDRAND is mandatory for SEV guests, so always available. If the
+ * hypervisor violates that the kernel will crash right here.
+ *
+ * The 64 bits of random data are stored to a memory location and at the same
+ * time kept in the %eax and %ebx registers. Since encryption is always active
+ * when paging is off the random data will be stored encrypted in main memory.
+ *
+ * Then paging is enabled. When the C-bit position is correct all memory is
+ * still mapped encrypted and comparing the register values with memory will
+ * succeed. An incorrect C-bit position will map all memory unencrypted, so that
+ * the compare will use the encrypted random data and fail.
+ */
+SYM_FUNC_START(startup32_check_sev_cbit)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+
+ /* Check for non-zero sev_status */
+ movl rva(sev_status)(%ebp), %eax
+ testl %eax, %eax
+ jz 4f
+
+ /*
+ * Get two 32-bit random values - Don't bail out if RDRAND fails
+ * because it is better to prevent forward progress if no random value
+ * can be gathered.
+ */
+1: rdrand %eax
+ jnc 1b
+2: rdrand %ebx
+ jnc 2b
+
+ /* Store to memory and keep it in the registers */
+ movl %eax, rva(sev_check_data)(%ebp)
+ movl %ebx, rva(sev_check_data+4)(%ebp)
+
+ /* Enable paging to see if encryption is active */
+ movl %cr0, %edx /* Backup %cr0 in %edx */
+ movl $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
+ movl %ecx, %cr0
+
+ cmpl %eax, rva(sev_check_data)(%ebp)
+ jne 3f
+ cmpl %ebx, rva(sev_check_data+4)(%ebp)
+ jne 3f
+
+ movl %edx, %cr0 /* Restore previous %cr0 */
+
+ jmp 4f
+
+3: /* Check failed - hlt the machine */
+ hlt
+ jmp 3b
+
+4:
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+#endif
+ ret
+SYM_FUNC_END(startup32_check_sev_cbit)
+
/*
* Stack and heap for uncompression
*/
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
index 804a502ee0d2..9b93567d663a 100644
--- a/arch/x86/boot/compressed/idt_64.c
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -52,3 +52,17 @@ void load_stage2_idt(void)
load_boot_idt(&boot_idt_desc);
}
+
+void cleanup_exception_handling(void)
+{
+ /*
+ * Flush GHCB from cache and map it encrypted again when running as
+ * SEV-ES guest.
+ */
+ sev_es_shutdown_ghcb();
+
+ /* Set a null-idt, disabling #PF and #VC handling */
+ boot_idt_desc.size = 0;
+ boot_idt_desc.address = 0;
+ load_boot_idt(&boot_idt_desc);
+}
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index b92fffbe761f..e36690778497 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -639,9 +639,9 @@ static bool process_mem_region(struct mem_vector *region,
if (slot_area_index == MAX_SLOT_AREA) {
debug_putstr("Aborted e820/efi memmap scan (slot_areas full)!\n");
- return 1;
+ return true;
}
- return 0;
+ return false;
}
#if defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index aa561795efd1..c1e81a848b2a 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -23,12 +23,6 @@ SYM_FUNC_START(get_sev_encryption_bit)
push %ecx
push %edx
- /* Check if running under a hypervisor */
- movl $1, %eax
- cpuid
- bt $31, %ecx /* Check the hypervisor bit */
- jnc .Lno_sev
-
movl $0x80000000, %eax /* CPUID to check the highest leaf */
cpuid
cmpl $0x8000001f, %eax /* See if 0x8000001f is available */
@@ -67,10 +61,132 @@ SYM_FUNC_START(get_sev_encryption_bit)
ret
SYM_FUNC_END(get_sev_encryption_bit)
+/**
+ * sev_es_req_cpuid - Request a CPUID value from the Hypervisor using
+ * the GHCB MSR protocol
+ *
+ * @%eax: Register to request (0=EAX, 1=EBX, 2=ECX, 3=EDX)
+ * @%edx: CPUID Function
+ *
+ * Returns 0 in %eax on success, non-zero on failure
+ * %edx returns CPUID value on success
+ */
+SYM_CODE_START_LOCAL(sev_es_req_cpuid)
+ shll $30, %eax
+ orl $0x00000004, %eax
+ movl $MSR_AMD64_SEV_ES_GHCB, %ecx
+ wrmsr
+ rep; vmmcall # VMGEXIT
+ rdmsr
+
+ /* Check response */
+ movl %eax, %ecx
+ andl $0x3ffff000, %ecx # Bits [12-29] MBZ
+ jnz 2f
+
+ /* Check return code */
+ andl $0xfff, %eax
+ cmpl $5, %eax
+ jne 2f
+
+ /* All good - return success */
+ xorl %eax, %eax
+1:
+ ret
+2:
+ movl $-1, %eax
+ jmp 1b
+SYM_CODE_END(sev_es_req_cpuid)
+
+SYM_CODE_START(startup32_vc_handler)
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+
+ /* Keep CPUID function in %ebx */
+ movl %eax, %ebx
+
+ /* Check if error-code == SVM_EXIT_CPUID */
+ cmpl $0x72, 16(%esp)
+ jne .Lfail
+
+ movl $0, %eax # Request CPUID[fn].EAX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 12(%esp) # Store result
+
+ movl $1, %eax # Request CPUID[fn].EBX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 8(%esp) # Store result
+
+ movl $2, %eax # Request CPUID[fn].ECX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 4(%esp) # Store result
+
+ movl $3, %eax # Request CPUID[fn].EDX
+ movl %ebx, %edx # CPUID fn
+ call sev_es_req_cpuid # Call helper
+ testl %eax, %eax # Check return code
+ jnz .Lfail
+ movl %edx, 0(%esp) # Store result
+
+ /*
+ * Sanity check CPUID results from the Hypervisor. See comment in
+ * do_vc_no_ghcb() for more details on why this is necessary.
+ */
+
+ /* Fail if SEV leaf not available in CPUID[0x80000000].EAX */
+ cmpl $0x80000000, %ebx
+ jne .Lcheck_sev
+ cmpl $0x8000001f, 12(%esp)
+ jb .Lfail
+ jmp .Ldone
+
+.Lcheck_sev:
+ /* Fail if SEV bit not set in CPUID[0x8000001f].EAX[1] */
+ cmpl $0x8000001f, %ebx
+ jne .Ldone
+ btl $1, 12(%esp)
+ jnc .Lfail
+
+.Ldone:
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ /* Remove error code */
+ addl $4, %esp
+
+ /* Jump over CPUID instruction */
+ addl $2, (%esp)
+
+ iret
+.Lfail:
+ /* Send terminate request to Hypervisor */
+ movl $0x100, %eax
+ xorl %edx, %edx
+ movl $MSR_AMD64_SEV_ES_GHCB, %ecx
+ wrmsr
+ rep; vmmcall
+
+ /* If request fails, go to hlt loop */
+ hlt
+ jmp .Lfail
+SYM_CODE_END(startup32_vc_handler)
+
.code64
#include "../../kernel/sev_verify_cbit.S"
-
SYM_FUNC_START(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT
push %rbp
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 267e7f93050e..dde042f64cca 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -430,8 +430,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
error("Destination address too large");
#endif
#ifndef CONFIG_RELOCATABLE
- if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
- error("Destination address does not match LOAD_PHYSICAL_ADDR");
if (virt_addr != LOAD_PHYSICAL_ADDR)
error("Destination virtual address changed when not relocatable");
#endif
@@ -443,11 +441,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel.\n");
- /*
- * Flush GHCB from cache and map it encrypted again when running as
- * SEV-ES guest.
- */
- sev_es_shutdown_ghcb();
+ /* Disable exception handling before booting the kernel */
+ cleanup_exception_handling();
return output;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 901ea5ebec22..e5612f035498 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -155,6 +155,12 @@ extern pteval_t __default_kernel_pte_mask;
extern gate_desc boot_idt[BOOT_IDT_ENTRIES];
extern struct desc_ptr boot_idt_desc;
+#ifdef CONFIG_X86_64
+void cleanup_exception_handling(void);
+#else
+static inline void cleanup_exception_handling(void) { }
+#endif
+
/* IDT Entry Points */
void boot_page_fault(void);
void boot_stage1_vc(void);
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
index 27826c265aab..d904bd56b3e3 100644
--- a/arch/x86/boot/compressed/sev-es.c
+++ b/arch/x86/boot/compressed/sev-es.c
@@ -200,14 +200,8 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
}
finish:
- if (result == ES_OK) {
+ if (result == ES_OK)
vc_finish_insn(&ctxt);
- } else if (result != ES_RETRY) {
- /*
- * For now, just halt the machine. That makes debugging easier,
- * later we just call sev_es_terminate() here.
- */
- while (true)
- asm volatile("hlt\n");
- }
+ else if (result != ES_RETRY)
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
}
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 7c4c7b2fbf05..98cf3b4e4c9f 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -24,7 +24,7 @@
/*
* Copyright 2012 Xyratex Technology Limited
*
- * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
+ * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation.
*/
#include <linux/init.h>
#include <linux/module.h>
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index 5af8021b98ce..6706b6cb1d0f 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -114,11 +114,11 @@ static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
);
}
-/* Computes the field substraction of two field elements */
+/* Computes the field subtraction of two field elements */
static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
{
asm volatile(
- /* Compute the raw substraction of f1-f2 */
+ /* Compute the raw subtraction of f1-f2 */
" movq 0(%1), %%r8;"
" subq 0(%2), %%r8;"
" movq 8(%1), %%r9;"
@@ -135,7 +135,7 @@ static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
" mov $38, %%rcx;"
" cmovc %%rcx, %%rax;"
- /* Step 2: Substract carry*38 from the original difference */
+ /* Step 2: Subtract carry*38 from the original difference */
" sub %%rax, %%r8;"
" sbb $0, %%r9;"
" sbb $0, %%r10;"
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 646da46e8d10..1dfb8af48a3c 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -16,7 +16,7 @@
#include <asm/simd.h>
asmlinkage void poly1305_init_x86_64(void *ctx,
- const u8 key[POLY1305_KEY_SIZE]);
+ const u8 key[POLY1305_BLOCK_SIZE]);
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
const size_t len, const u32 padbit);
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
@@ -81,7 +81,7 @@ static void convert_to_base2_64(void *ctx)
state->is_base2_26 = 0;
}
-static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE])
+static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE])
{
poly1305_init_x86_64(ctx, key);
}
@@ -129,7 +129,7 @@ static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
poly1305_emit_avx(ctx, mac, nonce);
}
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
{
poly1305_simd_init(&dctx->h, key);
dctx->s[0] = get_unaligned_le32(&key[16]);
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index fc23552afe37..bca4cea757ce 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -88,7 +88,7 @@
/*
* Combined G1 & G2 function. Reordered with help of rotates to have moves
- * at begining.
+ * at beginning.
*/
#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
/* G1,1 && G2,1 */ \
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 03725696397c..3507cf2064f1 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -117,7 +117,7 @@ static bool is_blacklisted_cpu(void)
* storing blocks in 64bit registers to allow three blocks to
* be processed parallel. Parallel operation then allows gaining
* more performance than was trade off, on out-of-order CPUs.
- * However Atom does not benefit from this parallellism and
+ * However Atom does not benefit from this parallelism and
* should be blacklisted.
*/
return true;
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 4efd39aacb9f..7b2542b13ebd 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -38,6 +38,7 @@
#ifdef CONFIG_X86_64
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
+ add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);
instrumentation_begin();
@@ -83,6 +84,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
unsigned int nr = syscall_32_enter(regs);
+ add_random_kstack_offset();
/*
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
* orig_ax, the unsigned int return value truncates it. This may
@@ -102,6 +104,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
unsigned int nr = syscall_32_enter(regs);
int res;
+ add_random_kstack_offset();
/*
* This cannot use syscall_enter_from_user_mode() as it has to
* fetch EBP before invoking any of the syscall entry work
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017e6161..ff0034740900 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -40,7 +40,7 @@
#include <asm/processor-flags.h>
#include <asm/irq_vectors.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/frame.h>
@@ -209,7 +209,7 @@
*
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
* is complete and in particular regs->sp is correct. This gives us
- * the original 6 enties as gap:
+ * the original 6 entries as gap:
*
* 14*4(%esp) - <previous context>
* 13*4(%esp) - gap / flags
@@ -430,7 +430,7 @@
* will soon execute iret and the tracer was already set to
* the irqstate after the IRET:
*/
- DISABLE_INTERRUPTS(CLBR_ANY)
+ cli
lss (%esp), %esp /* switch to espfix segment */
.Lend_\@:
#endif /* CONFIG_X86_ESPFIX32 */
@@ -1077,7 +1077,7 @@ restore_all_switch_stack:
* when returning from IPI handler and when returning from
* scheduler to user-space.
*/
- INTERRUPT_RETURN
+ iret
.section .fixup, "ax"
SYM_CODE_START(asm_iret_error)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 400908dff42e..a16a5294d55f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -305,7 +305,7 @@ SYM_CODE_END(ret_from_fork)
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
pushq %rax
- SAVE_FLAGS(CLBR_RAX)
+ SAVE_FLAGS
testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
@@ -511,7 +511,7 @@ SYM_CODE_START(\asmsym)
/*
* No need to switch back to the IST stack. The current stack is either
* identical to the stack in the IRET frame or the VC fall-back stack,
- * so it is definitly mapped even with PTI enabled.
+ * so it is definitely mapped even with PTI enabled.
*/
jmp paranoid_exit
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 2d0f3d8bcc25..edfe9780f6d1 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -218,7 +218,7 @@ int main(int argc, char **argv)
/*
* Figure out the struct name. If we're writing to a .so file,
- * generate raw output insted.
+ * generate raw output instead.
*/
name = strdup(argv[3]);
namelen = strlen(name);
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 1c7cfac7e64a..5264daa8859f 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -35,7 +35,7 @@ static void BITSFUNC(extract)(const unsigned char *data, size_t data_len,
if (offset + len > data_len)
fail("section to extract overruns input data");
- fprintf(outfile, "static const unsigned char %s[%lu] = {", name, len);
+ fprintf(outfile, "static const unsigned char %s[%zu] = {", name, len);
BITSFUNC(copy)(outfile, data + offset, len);
fprintf(outfile, "\n};\n\n");
}
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index de1fff7188aa..6ddd7a937b3e 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -6,7 +6,7 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
.text
.globl __kernel_vsyscall
@@ -29,7 +29,7 @@ __kernel_vsyscall:
* anyone with an AMD CPU, for example). Nonetheless, we try to keep
* it working approximately as well as it ever worked.
*
- * This link may eludicate some of the history:
+ * This link may elucidate some of the history:
* https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
* personally, I find it hard to understand what's going on there.
*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 825e829ffff1..235a5794296a 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -358,7 +358,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
mmap_write_lock(mm);
/*
* Check if we have already mapped vdso blob - fail to prevent
- * abusing from userspace install_speciall_mapping, which may
+ * abusing from userspace install_special_mapping, which may
* not do accounting and rlimit right.
* We could search vma near context.vdso, but it's a slowpath,
* so let's explicitly check all VMAs to be completely sure.
diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S
index 86a0e94f68df..99dafac992e2 100644
--- a/arch/x86/entry/vdso/vsgx.S
+++ b/arch/x86/entry/vdso/vsgx.S
@@ -137,7 +137,7 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave)
/*
* If the return from callback is zero or negative, return immediately,
- * else re-execute ENCLU with the postive return value interpreted as
+ * else re-execute ENCLU with the positive return value interpreted as
* the requested ENCLU function.
*/
cmp $0, %eax
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 2c1791c4a518..9687a8aef01c 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -623,7 +623,7 @@ static void amd_pmu_disable_all(void)
/*
* Check each counter for overflow and wait for it to be reset by the
* NMI if it has overflowed. This relies on the fact that all active
- * counters are always enabled when this function is caled and
+ * counters are always enabled when this function is called and
* ARCH_PERFMON_EVENTSEL_INT is always set.
*/
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
index 0e5c036fd7be..e6493a67f1c6 100644
--- a/arch/x86/events/amd/iommu.h
+++ b/arch/x86/events/amd/iommu.h
@@ -17,7 +17,7 @@
#define IOMMU_PC_DEVID_MATCH_REG 0x20
#define IOMMU_PC_COUNTER_REPORT_REG 0x28
-/* maximun specified bank/counters */
+/* maximum specified bank/counters */
#define PC_MAX_SPEC_BNKS 64
#define PC_MAX_SPEC_CNTRS 16
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 18df17129695..4c31cae4707e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -765,7 +765,7 @@ struct perf_sched {
};
/*
- * Initialize interator that runs through all events and counters.
+ * Initialize iterator that runs through all events and counters.
*/
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
int num, int wmin, int wmax, int gpmax)
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 731dd8d0dbb1..6320d2cfd9d3 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -594,7 +594,7 @@ static __init int bts_init(void)
* we cannot use the user mapping since it will not be available
* if we're not running the owning process.
*
- * With PTI we can't use the kernal map either, because its not
+ * With PTI we can't use the kernel map either, because its not
* there when we run userspace.
*
* For now, disable this driver when using PTI.
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 37ce38403cb8..3fd69bd5fa6e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -137,7 +137,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
- INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
+ INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMPTY */
INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
@@ -2186,7 +2186,7 @@ static void intel_pmu_enable_all(int added)
* magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
* in sequence on the same PMC or on different PMCs.
*
- * In practise it appears some of these events do in fact count, and
+ * In practice it appears some of these events do in fact count, and
* we need to program all 4 events.
*/
static void intel_pmu_nhm_workaround(void)
@@ -2435,7 +2435,7 @@ static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
/*
* The metric is reported as an 8bit integer fraction
- * suming up to 0xff.
+ * summing up to 0xff.
* slots-in-metric = (Metric / 0xff) * slots
*/
val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff;
@@ -2776,7 +2776,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
* processing loop coming after that the function, otherwise
* phony regular samples may be generated in the sampling buffer
* not marked with the EXACT tag. Another possibility is to have
- * one PEBS event and at least one non-PEBS event whic hoverflows
+ * one PEBS event and at least one non-PEBS event which overflows
* while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will
* not be set, yet the overflow status bit for the PEBS counter will
* be on Skylake.
@@ -2824,7 +2824,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
}
/*
- * Intel Perf mertrics
+ * Intel Perf metrics
*/
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
handled++;
@@ -4516,7 +4516,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000),
@@ -4594,7 +4594,7 @@ static bool check_msr(unsigned long msr, u64 mask)
/*
* Disable the check for real HW, so we don't
- * mess with potentionaly enabled registers:
+ * mess with potentially enabled registers:
*/
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return true;
@@ -4659,7 +4659,7 @@ static __init void intel_arch_events_quirk(void)
{
int bit;
- /* disable event that reported as not presend by cpuid */
+ /* disable event that reported as not present by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
pr_warn("CPUID marked event: \'%s\' unavailable\n",
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index d32b302719fe..5aabb0e2964a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1805,7 +1805,7 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
*
* [-period, 0]
*
- * the difference between two consequtive reads is:
+ * the difference between two consecutive reads is:
*
* A) value2 - value1;
* when no overflows have happened in between,
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 21890dacfcfe..acb04ef3da3f 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1198,7 +1198,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
/*
* The LBR logs any address in the IP, even if the IP just
* faulted. This means userspace can control the from address.
- * Ensure we don't blindy read any address by validating it is
+ * Ensure we don't blindly read any address by validating it is
* a known text address.
*/
if (kernel_text_address(from)) {
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index a4cc66005ce8..971dffe0b77d 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -24,7 +24,7 @@ struct p4_event_bind {
unsigned int escr_msr[2]; /* ESCR MSR for this event */
unsigned int escr_emask; /* valid ESCR EventMask bits */
unsigned int shared; /* event is shared across threads */
- char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
+ char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */
};
struct p4_pebs_bind {
@@ -45,7 +45,7 @@ struct p4_pebs_bind {
* it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
* event configuration to find out which values are to be
* written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
- * resgisters
+ * registers
*/
static struct p4_pebs_bind p4_pebs_bind_map[] = {
P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
@@ -1313,7 +1313,7 @@ static __initconst const struct x86_pmu p4_pmu = {
.get_event_constraints = x86_get_event_constraints,
/*
* IF HT disabled we may need to use all
- * ARCH_P4_MAX_CCCR counters simulaneously
+ * ARCH_P4_MAX_CCCR counters simultaneously
* though leave it restricted at moment assuming
* HT is on
*/
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index e94af4a54d0d..915847655c06 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -362,7 +362,7 @@ static bool pt_event_valid(struct perf_event *event)
/*
* Setting bit 0 (TraceEn in RTIT_CTL MSR) in the attr.config
- * clears the assomption that BranchEn must always be enabled,
+ * clears the assumption that BranchEn must always be enabled,
* as was the case with the first implementation of PT.
* If this bit is not set, the legacy behavior is preserved
* for compatibility with the older userspace.
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b79951d0707c..4bba0491068c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -280,17 +280,17 @@
* | [63] | 00h | VALID - When set, indicates the CPU bus
* numbers have been initialized. (RO)
* |[62:48]| --- | Reserved
- * |[47:40]| 00h | BUS_NUM_5 — Return the bus number BIOS assigned
+ * |[47:40]| 00h | BUS_NUM_5 - Return the bus number BIOS assigned
* CPUBUSNO(5). (RO)
- * |[39:32]| 00h | BUS_NUM_4 — Return the bus number BIOS assigned
+ * |[39:32]| 00h | BUS_NUM_4 - Return the bus number BIOS assigned
* CPUBUSNO(4). (RO)
- * |[31:24]| 00h | BUS_NUM_3 — Return the bus number BIOS assigned
+ * |[31:24]| 00h | BUS_NUM_3 - Return the bus number BIOS assigned
* CPUBUSNO(3). (RO)
- * |[23:16]| 00h | BUS_NUM_2 — Return the bus number BIOS assigned
+ * |[23:16]| 00h | BUS_NUM_2 - Return the bus number BIOS assigned
* CPUBUSNO(2). (RO)
- * |[15:8] | 00h | BUS_NUM_1 — Return the bus number BIOS assigned
+ * |[15:8] | 00h | BUS_NUM_1 - Return the bus number BIOS assigned
* CPUBUSNO(1). (RO)
- * | [7:0] | 00h | BUS_NUM_0 — Return the bus number BIOS assigned
+ * | [7:0] | 00h | BUS_NUM_0 - Return the bus number BIOS assigned
* CPUBUSNO(0). (RO)
*/
#define SKX_MSR_CPU_BUS_NUMBER 0x300
@@ -1159,7 +1159,6 @@ enum {
SNBEP_PCI_QPI_PORT0_FILTER,
SNBEP_PCI_QPI_PORT1_FILTER,
BDX_PCI_QPI_PORT2_FILTER,
- HSWEP_PCI_PCU_3,
};
static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
@@ -2857,22 +2856,33 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
NULL,
};
-void hswep_uncore_cpu_init(void)
+#define HSWEP_PCU_DID 0x2fc0
+#define HSWEP_PCU_CAPID4_OFFET 0x94
+#define hswep_get_chop(_cap) (((_cap) >> 6) & 0x3)
+
+static bool hswep_has_limit_sbox(unsigned int device)
{
- int pkg = boot_cpu_data.logical_proc_id;
+ struct pci_dev *dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, NULL);
+ u32 capid4;
+
+ if (!dev)
+ return false;
+
+ pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4);
+ if (!hswep_get_chop(capid4))
+ return true;
+ return false;
+}
+
+void hswep_uncore_cpu_init(void)
+{
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
/* Detect 6-8 core systems with only two SBOXes */
- if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
- u32 capid4;
-
- pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
- 0x94, &capid4);
- if (((capid4 >> 6) & 0x3) == 0)
- hswep_uncore_sbox.num_boxes = 2;
- }
+ if (hswep_has_limit_sbox(HSWEP_PCU_DID))
+ hswep_uncore_sbox.num_boxes = 2;
uncore_msr_uncores = hswep_msr_uncores;
}
@@ -3135,11 +3145,6 @@ static const struct pci_device_id hswep_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
SNBEP_PCI_QPI_PORT1_FILTER),
},
- { /* PCU.3 (for Capability registers) */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- HSWEP_PCI_PCU_3),
- },
{ /* end: all zeroes */ }
};
@@ -3231,27 +3236,18 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
EVENT_CONSTRAINT_END
};
+#define BDX_PCU_DID 0x6fc0
+
void bdx_uncore_cpu_init(void)
{
- int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id);
-
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
- /* BDX-DE doesn't have SBOX */
- if (boot_cpu_data.x86_model == 86) {
- uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
/* Detect systems with no SBOXes */
- } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
- struct pci_dev *pdev;
- u32 capid4;
-
- pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3];
- pci_read_config_dword(pdev, 0x94, &capid4);
- if (((capid4 >> 6) & 0x3) == 0)
- bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
- }
+ if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID))
+ uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+
hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
}
@@ -3472,11 +3468,6 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
BDX_PCI_QPI_PORT2_FILTER),
},
- { /* PCU.3 (for Capability registers) */
- PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
- HSWEP_PCI_PCU_3),
- },
{ /* end: all zeroes */ }
};
diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c
index e68827e604ad..949d845c922b 100644
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -494,7 +494,7 @@ static __init void zhaoxin_arch_events_quirk(void)
{
int bit;
- /* disable event that reported as not presend by cpuid */
+ /* disable event that reported as not present by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
pr_warn("CPUID marked event: \'%s\' unavailable\n",
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index b81047dec1da..e7b94f636cc1 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
static inline bool hv_reenlightenment_available(void)
{
/*
- * Check for required features and priviliges to make TSC frequency
+ * Check for required features and privileges to make TSC frequency
* change notifications work.
*/
return ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
@@ -292,7 +292,7 @@ static int hv_suspend(void)
/*
* Reset the hypercall page as it is going to be invalidated
- * accross hibernation. Setting hv_hypercall_pg to NULL ensures
+ * across hibernation. Setting hv_hypercall_pg to NULL ensures
* that any subsequent hypercall operation fails safely instead of
* crashing due to an access of an invalid page. The hypercall page
* pointer is restored on resume.
diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h
index 62da760d6d5a..cd7b14322035 100644
--- a/arch/x86/include/asm/agp.h
+++ b/arch/x86/include/asm/agp.h
@@ -9,7 +9,7 @@
* Functions to keep the agpgart mappings coherent with the MMU. The
* GART gives the CPU a physical alias of pages in memory. The alias
* region is mapped uncacheable. Make sure there are no conflicting
- * mappings with different cachability attributes for the same
+ * mappings with different cacheability attributes for the same
* page. This avoids data corruption on some CPUs.
*/
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644
index 464034db299f..000000000000
--- a/arch/x86/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-
-#ifdef CONFIG_SMP
- .macro LOCK_PREFIX
-672: lock
- .pushsection .smp_locks,"a"
- .balign 4
- .long 672b - .
- .popsection
- .endm
-#else
- .macro LOCK_PREFIX
- .endm
-#endif
-
-/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.ignore_alts
- .long .Lannotate_\@ - .
- .popsection
-.endm
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
- .long \orig - .
- .long \alt - .
- .word \feature
- .byte \orig_len
- .byte \alt_len
- .byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
-.endm
-
-#define old_len 141b-140b
-#define new_len1 144f-143f
-#define new_len2 145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
- \oldinstr
-141:
- .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_short(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13adca37c99a..17b36090d448 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -2,13 +2,17 @@
#ifndef _ASM_X86_ALTERNATIVE_H
#define _ASM_X86_ALTERNATIVE_H
-#ifndef __ASSEMBLY__
-
#include <linux/types.h>
-#include <linux/stddef.h>
#include <linux/stringify.h>
#include <asm/asm.h>
+#define ALTINSTR_FLAG_INV (1 << 15)
+#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stddef.h>
+
/*
* Alternative inline assembly for SMP.
*
@@ -150,7 +154,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
" .byte " alt_rlen(num) "\n" /* replacement len */ \
" .byte " alt_pad_len "\n" /* pad len */
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
"# ALT: replacement " #num "\n" \
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
@@ -161,7 +165,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_ENTRY(feature, 1) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
+ ALTINSTR_REPLACEMENT(newinstr, 1) \
".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
@@ -171,10 +175,15 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+ ALTINSTR_REPLACEMENT(newinstr1, 1) \
+ ALTINSTR_REPLACEMENT(newinstr2, 2) \
".popsection\n"
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
+ newinstr_yes, feature)
+
#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
OLDINSTR_3(oldinsn, 1, 2, 3) \
".pushsection .altinstructions,\"a\"\n" \
@@ -183,9 +192,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_ENTRY(feat3, 3) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \
- ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \
- ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \
+ ALTINSTR_REPLACEMENT(newinsn1, 1) \
+ ALTINSTR_REPLACEMENT(newinsn2, 2) \
+ ALTINSTR_REPLACEMENT(newinsn3, 3) \
".popsection\n"
/*
@@ -206,6 +215,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \
+ asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory")
+
/*
* Alternative inline assembly with input.
*
@@ -271,6 +283,116 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_SMP
+ .macro LOCK_PREFIX
+672: lock
+ .pushsection .smp_locks,"a"
+ .balign 4
+ .long 672b - .
+ .popsection
+ .endm
+#else
+ .macro LOCK_PREFIX
+ .endm
+#endif
+
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+.macro ANNOTATE_IGNORE_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.ignore_alts
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+ .long \orig - .
+ .long \alt - .
+ .word \feature
+ .byte \orig_len
+ .byte \alt_len
+ .byte \pad_len
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+ \oldinstr
+141:
+ .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr
+144:
+ .popsection
+.endm
+
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * gas compatible max based on the idea from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
+ */
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+ \oldinstr
+141:
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr1
+144:
+ \newinstr2
+145:
+ .popsection
+.endm
+
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+ ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
+ newinstr_yes, feature
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 4d4ec5cbdc51..94fbe6ae7431 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -22,7 +22,7 @@ extern void __add_wrong_size(void)
/*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
* -1 because sizeof will never return -1, thereby making those switch
- * case statements guaranteeed dead code which the compiler will
+ * case statements guaranteed dead code which the compiler will
* eliminate, and allowing the "missing symbol in the default case" to
* indicate a usage error.
*/
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index da78ccbd493b..0d7fc0e2bfc9 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -41,12 +41,13 @@ unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
#ifdef CONFIG_CPU_SUP_INTEL
-extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
+extern void __init sld_setup(struct cpuinfo_x86 *c);
extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
+extern void handle_bus_lock(struct pt_regs *regs);
#else
-static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
+static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline void switch_to_sld(unsigned long tifn) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
@@ -57,6 +58,8 @@ static inline bool handle_guest_split_lock(unsigned long ip)
{
return false;
}
+
+static inline void handle_bus_lock(struct pt_regs *regs) {}
#endif
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1728d4ce5730..16a51e7288d5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -8,6 +8,7 @@
#include <asm/asm.h>
#include <linux/bitops.h>
+#include <asm/alternative.h>
enum cpuid_leafs
{
@@ -175,39 +176,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P[always]\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P[feature]\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
+ asm_volatile_goto(
+ ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
: : [feature] "i" (bit),
- [always] "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index cc96e26d69f7..624116562086 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -236,6 +236,8 @@
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -290,6 +292,8 @@
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -354,6 +358,7 @@
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */
#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9224d40cdefe..7d7500806af8 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -283,12 +283,12 @@ extern u32 elf_hwcap2;
*
* The decision process for determining the results are:
*
- *              CPU: | lacks NX*  | has NX, ia32     | has NX, x86_64 |
- * ELF:              |            |                  |                |
+ * CPU: | lacks NX* | has NX, ia32 | has NX, x86_64 |
+ * ELF: | | | |
* ---------------------|------------|------------------|----------------|
- * missing PT_GNU_STACK | exec-all   | exec-all         | exec-none      |
- * PT_GNU_STACK == RWX  | exec-stack | exec-stack       | exec-stack     |
- * PT_GNU_STACK == RW   | exec-none  | exec-none        | exec-none      |
+ * missing PT_GNU_STACK | exec-all | exec-all | exec-none |
+ * PT_GNU_STACK == RWX | exec-stack | exec-stack | exec-stack |
+ * PT_GNU_STACK == RW | exec-none | exec-none | exec-none |
*
* exec-all : all PROT_READ user mappings are executable, except when
* backed by files on a noexec-filesystem.
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 2b87b191b3b8..14ebd2196569 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_ENTRY_COMMON_H
#define _ASM_X86_ENTRY_COMMON_H
+#include <linux/randomize_kstack.h>
#include <linux/user-return-notifier.h>
#include <asm/nospec-branch.h>
@@ -70,6 +71,21 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
*/
current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
#endif
+
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * but not enough for x86 stack utilization comfort. To keep
+ * reasonable stack head room, reduce the maximum offset to 8 bits.
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints (see cc_stack_align4/8 in
+ * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
+ * low bits from any entropy chosen here.
+ *
+ * Therefore, final stack offset entropy will be 5 (x86_64) or
+ * 6 (ia32) bits.
+ */
+ choose_random_kstack_offset(rdtsc() & 0xFF);
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 5eb3bdf36a41..e35e342673c7 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -547,7 +547,7 @@ SYM_CODE_END(spurious_entries_start)
/*
* Dummy trap number so the low level ASM macro vector number checks do not
* match which results in emitting plain IDTENTRY stubs without bells and
- * whistels.
+ * whistles.
*/
#define X86_TRAP_OTHER 0xFFFF
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
index 3cb002b1d0f9..994638ef171b 100644
--- a/arch/x86/include/asm/intel_pconfig.h
+++ b/arch/x86/include/asm/intel_pconfig.h
@@ -38,7 +38,7 @@ enum pconfig_leaf {
#define MKTME_INVALID_ENC_ALG 4
#define MKTME_DEVICE_BUSY 5
-/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */
+/* Hardware requires the structure to be 256 byte aligned. Otherwise #GP(0). */
struct mktme_key_program {
u16 keyid;
u32 keyid_ctrl;
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 423b788f495e..ebe8d2ea44fe 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -3,7 +3,7 @@
#define _ASM_X86_INTEL_PT_H
#define PT_CPUID_LEAVES 2
-#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
+#define PT_CPUID_REGS_NUM 4 /* number of registers (eax, ebx, ecx, edx) */
enum pt_capabilities {
PT_CAP_max_subleaf = 0,
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d726459d08e5..841a5d104afa 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -159,7 +159,7 @@ static inline void *phys_to_virt(phys_addr_t address)
/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
* However, we truncate the address to unsigned int to avoid undesirable
- * promitions in legacy drivers.
+ * promotions in legacy drivers.
*/
static inline unsigned int isa_virt_to_bus(volatile void *address)
{
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 9b2a0ff76c73..562854c60808 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -190,7 +190,7 @@
/*
* Macro to invoke __do_softirq on the irq stack. This is only called from
- * task context when bottom halfs are about to be reenabled and soft
+ * task context when bottom halves are about to be reenabled and soft
* interrupts are pending to be processed. The interrupt stack cannot be in
* use here.
*/
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 144d70ea4393..c5ce9845c999 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -109,18 +109,13 @@ static __always_inline unsigned long arch_local_irq_save(void)
}
#else
-#define ENABLE_INTERRUPTS(x) sti
-#define DISABLE_INTERRUPTS(x) cli
-
#ifdef CONFIG_X86_64
#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x) pushfq; popq %rax
+#define SAVE_FLAGS pushfq; popq %rax
#endif
#define INTERRUPT_RETURN jmp native_iret
-#else
-#define INTERRUPT_RETURN iret
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3768819693e5..10eca9e8f7f6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1488,7 +1488,7 @@ extern u64 kvm_mce_cap_supported;
/*
* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
* userspace I/O) to indicate that the emulation context
- * should be resued as is, i.e. skip initialization of
+ * should be reused as is, i.e. skip initialization of
* emulation context, instruction fetch and decode.
*
* EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
@@ -1513,7 +1513,7 @@ extern u64 kvm_mce_cap_supported;
*
* EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
* backdoor emulation, which is opt in via module param.
- * VMware backoor emulation handles select instructions
+ * VMware backdoor emulation handles select instructions
* and reinjects the #GP for all other cases.
*
* EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index ccf60a809a17..e7be720062a8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -63,7 +63,7 @@ typedef int (*hyperv_fill_flush_list_func)(
static __always_inline void hv_setup_sched_clock(void *sched_clock)
{
#ifdef CONFIG_PARAVIRT
- pv_ops.time.sched_clock = sched_clock;
+ paravirt_set_sched_clock(sched_clock);
#endif
}
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 546d6ecf0a35..fe335d8c1676 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -265,6 +265,7 @@
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
#define DEBUGCTLMSR_BTINT (1UL << 8)
@@ -628,8 +629,6 @@
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define MSR_IA32_TSCDEADLINE 0x000006e0
-
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index cb9ad6b73973..c14fb80b9a07 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -7,7 +7,6 @@
#include <linux/objtool.h>
#include <asm/alternative.h>
-#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
@@ -33,7 +32,7 @@
/*
* Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
+ * the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4abf110e2243..43992e5c52c2 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -15,11 +15,20 @@
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/cpumask.h>
+#include <linux/static_call_types.h>
#include <asm/frame.h>
-static inline unsigned long long paravirt_sched_clock(void)
+u64 dummy_steal_clock(int cpu);
+u64 dummy_sched_clock(void);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock);
+
+void paravirt_set_sched_clock(u64 (*func)(void));
+
+static inline u64 paravirt_sched_clock(void)
{
- return PVOP_CALL0(unsigned long long, time.sched_clock);
+ return static_call(pv_sched_clock)();
}
struct static_key;
@@ -33,9 +42,13 @@ bool pv_is_native_vcpu_is_preempted(void);
static inline u64 paravirt_steal_clock(int cpu)
{
- return PVOP_CALL1(u64, time.steal_clock, cpu);
+ return static_call(pv_steal_clock)(cpu);
}
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
@@ -122,7 +135,9 @@ static inline void write_cr0(unsigned long x)
static inline unsigned long read_cr2(void)
{
- return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
+ return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+ "mov %%cr2, %%rax;",
+ ALT_NOT(X86_FEATURE_XENPV));
}
static inline void write_cr2(unsigned long x)
@@ -132,12 +147,14 @@ static inline void write_cr2(unsigned long x)
static inline unsigned long __read_cr3(void)
{
- return PVOP_CALL0(unsigned long, mmu.read_cr3);
+ return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+ "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void write_cr3(unsigned long x)
{
- PVOP_VCALL1(mmu.write_cr3, x);
+ PVOP_ALT_VCALL1(mmu.write_cr3, x,
+ "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void __write_cr4(unsigned long x)
@@ -157,7 +174,7 @@ static inline void halt(void)
static inline void wbinvd(void)
{
- PVOP_VCALL0(cpu.wbinvd);
+ PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
}
static inline u64 paravirt_read_msr(unsigned msr)
@@ -371,22 +388,28 @@ static inline void paravirt_release_p4d(unsigned long pfn)
static inline pte_t __pte(pteval_t val)
{
- return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
+ return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
}
static inline pteval_t pte_val(pte_t pte)
{
- return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
+ return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline pgd_t __pgd(pgdval_t val)
{
- return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
+ return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
}
static inline pgdval_t pgd_val(pgd_t pgd)
{
- return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
+ return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -419,12 +442,15 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline pmd_t __pmd(pmdval_t val)
{
- return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) };
+ return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
}
static inline pmdval_t pmd_val(pmd_t pmd)
{
- return PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
+ return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -436,14 +462,16 @@ static inline pud_t __pud(pudval_t val)
{
pudval_t ret;
- ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val);
+ ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
return (pud_t) { ret };
}
static inline pudval_t pud_val(pud_t pud)
{
- return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud);
+ return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void pud_clear(pud_t *pudp)
@@ -462,14 +490,17 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline p4d_t __p4d(p4dval_t val)
{
- p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val);
+ p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV));
return (p4d_t) { ret };
}
static inline p4dval_t p4d_val(p4d_t p4d)
{
- return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d);
+ return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -556,7 +587,9 @@ static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
{
- PVOP_VCALLEE1(lock.queued_spin_unlock, lock);
+ PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock,
+ "movb $0, (%%" _ASM_ARG1 ");",
+ ALT_NOT(X86_FEATURE_PVUNLOCK));
}
static __always_inline void pv_wait(u8 *ptr, u8 val)
@@ -571,7 +604,9 @@ static __always_inline void pv_kick(int cpu)
static __always_inline bool pv_vcpu_is_preempted(long cpu)
{
- return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu);
+ return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu,
+ "xor %%" _ASM_AX ", %%" _ASM_AX ";",
+ ALT_NOT(X86_FEATURE_VCPUPREEMPT));
}
void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock);
@@ -645,17 +680,18 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
#ifdef CONFIG_PARAVIRT_XXL
static inline notrace unsigned long arch_local_save_flags(void)
{
- return PVOP_CALLEE0(unsigned long, irq.save_fl);
+ return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
+ ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace void arch_local_irq_disable(void)
{
- PVOP_VCALLEE0(irq.irq_disable);
+ PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace void arch_local_irq_enable(void)
{
- PVOP_VCALLEE0(irq.irq_enable);
+ PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace unsigned long arch_local_irq_save(void)
@@ -700,84 +736,27 @@ extern void default_banner(void);
.popsection
-#define COND_PUSH(set, mask, reg) \
- .if ((~(set)) & mask); push %reg; .endif
-#define COND_POP(set, mask, reg) \
- .if ((~(set)) & mask); pop %reg; .endif
-
#ifdef CONFIG_X86_64
-
-#define PV_SAVE_REGS(set) \
- COND_PUSH(set, CLBR_RAX, rax); \
- COND_PUSH(set, CLBR_RCX, rcx); \
- COND_PUSH(set, CLBR_RDX, rdx); \
- COND_PUSH(set, CLBR_RSI, rsi); \
- COND_PUSH(set, CLBR_RDI, rdi); \
- COND_PUSH(set, CLBR_R8, r8); \
- COND_PUSH(set, CLBR_R9, r9); \
- COND_PUSH(set, CLBR_R10, r10); \
- COND_PUSH(set, CLBR_R11, r11)
-#define PV_RESTORE_REGS(set) \
- COND_POP(set, CLBR_R11, r11); \
- COND_POP(set, CLBR_R10, r10); \
- COND_POP(set, CLBR_R9, r9); \
- COND_POP(set, CLBR_R8, r8); \
- COND_POP(set, CLBR_RDI, rdi); \
- COND_POP(set, CLBR_RSI, rsi); \
- COND_POP(set, CLBR_RDX, rdx); \
- COND_POP(set, CLBR_RCX, rcx); \
- COND_POP(set, CLBR_RAX, rax)
+#ifdef CONFIG_PARAVIRT_XXL
#define PARA_PATCH(off) ((off) / 8)
#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
-#else
-#define PV_SAVE_REGS(set) \
- COND_PUSH(set, CLBR_EAX, eax); \
- COND_PUSH(set, CLBR_EDI, edi); \
- COND_PUSH(set, CLBR_ECX, ecx); \
- COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set) \
- COND_POP(set, CLBR_EDX, edx); \
- COND_POP(set, CLBR_ECX, ecx); \
- COND_POP(set, CLBR_EDI, edi); \
- COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off) ((off) / 4)
-#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr) *%cs:addr
-#endif
-#ifdef CONFIG_PARAVIRT_XXL
#define INTERRUPT_RETURN \
- PARA_SITE(PARA_PATCH(PV_CPU_iret), \
- ANNOTATE_RETPOLINE_SAFE; \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
-
-#define DISABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#endif
+ ANNOTATE_RETPOLINE_SAFE; \
+ ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \
+ X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+.macro PARA_IRQ_save_fl
+ PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
+ ANNOTATE_RETPOLINE_SAFE;
+ call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);)
+.endm
+
+#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \
+ ALT_NOT(X86_FEATURE_XENPV)
#endif
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */
@@ -800,5 +779,11 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
}
#endif
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index de87087d3bde..ae692c3194e9 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -3,7 +3,6 @@
#define _ASM_X86_PARAVIRT_TYPES_H
/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0
#define CLBR_EAX (1 << 0)
#define CLBR_ECX (1 << 1)
#define CLBR_EDX (1 << 2)
@@ -15,7 +14,6 @@
#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
-#define CLBR_SCRATCH (0)
#else
#define CLBR_RAX CLBR_EAX
#define CLBR_RCX CLBR_ECX
@@ -32,12 +30,9 @@
#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
CLBR_RCX | CLBR_R8 | CLBR_R9)
#define CLBR_RET_REG (CLBR_RAX)
-#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
#endif /* X86_64 */
-#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
-
#ifndef __ASSEMBLY__
#include <asm/desc_defs.h>
@@ -73,19 +68,6 @@ struct pv_info {
const char *name;
};
-struct pv_init_ops {
- /*
- * Patch may replace one of the defined code sequences with
- * arbitrary code, subject to the same register constraints.
- * This generally means the code is not free to clobber any
- * registers other than EAX. The patch function should return
- * the number of bytes of code generated, as we nop pad the
- * rest in generic code.
- */
- unsigned (*patch)(u8 type, void *insn_buff,
- unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
#ifdef CONFIG_PARAVIRT_XXL
struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
@@ -95,11 +77,6 @@ struct pv_lazy_ops {
} __no_randomize_layout;
#endif
-struct pv_time_ops {
- unsigned long long (*sched_clock)(void);
- unsigned long long (*steal_clock)(int cpu);
-} __no_randomize_layout;
-
struct pv_cpu_ops {
/* hooks for various privileged instructions */
void (*io_delay)(void);
@@ -156,10 +133,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
- /* Normal iret. Jump to this with the standard iret stack
- frame set up. */
- void (*iret)(void);
-
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
#endif
@@ -290,8 +263,6 @@ struct pv_lock_ops {
* number for each function using the offset which we use to indicate
* what to patch. */
struct paravirt_patch_template {
- struct pv_init_ops init;
- struct pv_time_ops time;
struct pv_cpu_ops cpu;
struct pv_irq_ops irq;
struct pv_mmu_ops mmu;
@@ -300,6 +271,7 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct paravirt_patch_template pv_ops;
+extern void (*paravirt_iret)(void);
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@ -331,11 +303,7 @@ extern struct paravirt_patch_template pv_ops;
/* Simple instruction patching code. */
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len);
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len);
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len);
int paravirt_disable_iospace(void);
@@ -371,7 +339,7 @@ int paravirt_disable_iospace(void);
* on the stack. All caller-save registers (eax,edx,ecx) are expected
* to be modified (either clobbered or used for return values).
* X86_64, on the other hand, already specifies a register-based calling
- * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * conventions, returning at %rax, with parameters going on %rdi, %rsi,
* %rdx, and %rcx. Note that for this reason, x86_64 does not need any
* special handling for dealing with 4 arguments, unlike i386.
* However, x86_64 also have to clobber all caller saved registers, which
@@ -414,11 +382,9 @@ int paravirt_disable_iospace(void);
* makes sure the incoming and outgoing types are always correct.
*/
#ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS \
+#define PVOP_CALL_ARGS \
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
@@ -434,12 +400,10 @@ int paravirt_disable_iospace(void);
#define VEXTRA_CLOBBERS
#else /* CONFIG_X86_64 */
/* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS \
+#define PVOP_CALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
__edx = __edx, __ecx = __ecx, __eax = __eax;
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
@@ -464,152 +428,138 @@ int paravirt_disable_iospace(void);
#define PVOP_TEST_NULL(op) ((void)pv_ops.op)
#endif
-#define PVOP_RETMASK(rettype) \
+#define PVOP_RETVAL(rettype) \
({ unsigned long __mask = ~0UL; \
+ BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long)); \
switch (sizeof(rettype)) { \
case 1: __mask = 0xffUL; break; \
case 2: __mask = 0xffffUL; break; \
case 4: __mask = 0xffffffffUL; break; \
default: break; \
} \
- __mask; \
+ __mask & __eax; \
})
-#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
- pre, post, ...) \
+#define ____PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...) \
({ \
- rettype __ret; \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- /* This is 32-bit specific, but is okay in 64-bit */ \
- /* since this condition will never hold */ \
- if (sizeof(rettype) > sizeof(unsigned long)) { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)((((u64)__edx) << 32) | __eax); \
- } else { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \
- } \
- __ret; \
+ asm volatile(paravirt_alt(PARAVIRT_CALL) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
+ : paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__ \
+ : "memory", "cc" extra_clbr); \
+ ret; \
})
-#define __PVOP_CALL(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
- EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
-
-#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
- PVOP_CALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
-
-
-#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
+#define ____PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \
+ extra_clbr, ...) \
({ \
- PVOP_VCALL_ARGS; \
+ PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
+ asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), \
+ alt, cond) \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
+ ret; \
})
-#define __PVOP_VCALL(op, pre, post, ...) \
- ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
- VEXTRA_CLOBBERS, \
- pre, post, ##__VA_ARGS__)
+#define __PVOP_CALL(rettype, op, ...) \
+ ____PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY, \
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \
+ ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \
+ ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, ...) \
+ ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG, \
+ PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \
+ ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \
+ CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
+#define __PVOP_VCALL(op, ...) \
+ (void)____PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
+ VEXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALL(op, alt, cond, ...) \
+ (void)____PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY, \
+ PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \
+ ##__VA_ARGS__)
-#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
- ____PVOP_VCALL(op.func, CLBR_RET_REG, \
- PVOP_VCALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
+#define __PVOP_VCALLEESAVE(op, ...) \
+ (void)____PVOP_CALL(, op.func, CLBR_RET_REG, \
+ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \
+ (void)____PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \
+ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
#define PVOP_CALL0(rettype, op) \
- __PVOP_CALL(rettype, op, "", "")
+ __PVOP_CALL(rettype, op)
#define PVOP_VCALL0(op) \
- __PVOP_VCALL(op, "", "")
+ __PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond) \
+ __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond) \
+ __PVOP_ALT_VCALL(op, alt, cond)
#define PVOP_CALLEE0(rettype, op) \
- __PVOP_CALLEESAVE(rettype, op, "", "")
+ __PVOP_CALLEESAVE(rettype, op)
#define PVOP_VCALLEE0(op) \
- __PVOP_VCALLEESAVE(op, "", "")
+ __PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond) \
+ __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond) \
+ __PVOP_ALT_VCALLEESAVE(op, alt, cond)
#define PVOP_CALL1(rettype, op, arg1) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
#define PVOP_VCALL1(op, arg1) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond) \
+ __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
#define PVOP_CALLEE1(rettype, op, arg1) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1))
#define PVOP_VCALLEE1(op, arg1) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond) \
+ __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond) \
+ __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1))
#define PVOP_CALL2(rettype, op, arg1, arg2) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
#define PVOP_VCALL2(op, arg1, arg2) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
-#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALLEE2(op, arg1, arg2) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), \
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
#define PVOP_VCALL3(op, arg1, arg2, arg3) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), \
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
-/* This is the only difference in x86_64. We can make it much simpler */
-#ifdef CONFIG_X86_32
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
__PVOP_CALL(rettype, op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
- PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
- "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
-#else
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
- __PVOP_CALL(rettype, op, "", "", \
PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, "", "", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#endif
/* Lazy mode for batching updates / context switch */
enum paravirt_lazy_mode {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a02c67291cfc..b1099f2d9800 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1244,7 +1244,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
- * dst - pointer to pgd range anwhere on a pgd page
+ * dst - pointer to pgd range anywhere on a pgd page
* src - ""
* count - the number of pgds to copy.
*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f1b9ed5efaa9..185142b84ebe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -314,11 +314,6 @@ struct x86_hw_tss {
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
-
- /*
- * We store cpu_current_top_of_stack in sp1 so it's always accessible.
- * Linux does not use ring 1, so sp1 is not otherwise needed.
- */
u64 sp1;
/*
@@ -426,12 +421,7 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
-#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
-#else
-/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
-#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
-#endif
#ifdef CONFIG_X86_64
struct fixed_percpu_data {
@@ -527,7 +517,7 @@ struct thread_struct {
struct io_bitmap *io_bitmap;
/*
- * IOPL. Priviledge level dependent I/O permission which is
+ * IOPL. Privilege level dependent I/O permission which is
* emulated via the I/O bitmap to prevent user space from disabling
* interrupts.
*/
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index b6a9d51d1d79..8c5d1910a848 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -4,6 +4,8 @@
#include <asm/ldt.h>
+struct task_struct;
+
/* misc architecture specific prototypes */
void syscall_init(void);
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 4352f08bfbb5..43fa081a1adb 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -8,8 +8,8 @@
/*
* The set_memory_* API can be used to change various attributes of a virtual
* address range. The attributes include:
- * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
- * Executability : eXeutable, NoteXecutable
+ * Cacheability : UnCached, WriteCombining, WriteThrough, WriteBack
+ * Executability : eXecutable, NoteXecutable
* Read/Write : ReadOnly, ReadWrite
* Presence : NotPresent
* Encryption : Encrypted, Decrypted
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 389d851a02c4..a12458a7a8d4 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -130,11 +130,6 @@ void *extend_brk(size_t size, size_t align);
: : "i" (sz)); \
}
-/* Helper for reserving space for arrays of things */
-#define RESERVE_BRK_ARRAY(type, name, entries) \
- type *name; \
- RESERVE_BRK(name, sizeof(type) * entries)
-
extern void probe_roms(void);
#ifdef __i386__
diff --git a/arch/x86/kernel/cpu/sgx/arch.h b/arch/x86/include/asm/sgx.h
index dd7602c44c72..9c31e0ebc55b 100644
--- a/arch/x86/kernel/cpu/sgx/arch.h
+++ b/arch/x86/include/asm/sgx.h
@@ -2,15 +2,20 @@
/**
* Copyright(c) 2016-20 Intel Corporation.
*
- * Contains data structures defined by the SGX architecture. Data structures
- * defined by the Linux software stack should not be placed here.
+ * Intel Software Guard Extensions (SGX) support.
*/
-#ifndef _ASM_X86_SGX_ARCH_H
-#define _ASM_X86_SGX_ARCH_H
+#ifndef _ASM_X86_SGX_H
+#define _ASM_X86_SGX_H
#include <linux/bits.h>
#include <linux/types.h>
+/*
+ * This file contains both data structures defined by SGX architecture and Linux
+ * defined software data structures and functions. The two should not be mixed
+ * together for better readibility. The architectural definitions come first.
+ */
+
/* The SGX specific CPUID function. */
#define SGX_CPUID 0x12
/* EPC enumeration. */
@@ -22,16 +27,36 @@
/* The bitmask for the EPC section type. */
#define SGX_CPUID_EPC_MASK GENMASK(3, 0)
+enum sgx_encls_function {
+ ECREATE = 0x00,
+ EADD = 0x01,
+ EINIT = 0x02,
+ EREMOVE = 0x03,
+ EDGBRD = 0x04,
+ EDGBWR = 0x05,
+ EEXTEND = 0x06,
+ ELDU = 0x08,
+ EBLOCK = 0x09,
+ EPA = 0x0A,
+ EWB = 0x0B,
+ ETRACK = 0x0C,
+ EAUG = 0x0D,
+ EMODPR = 0x0E,
+ EMODT = 0x0F,
+};
+
/**
* enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
* %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
* been completed yet.
+ * %SGX_CHILD_PRESENT SECS has child pages present in the EPC.
* %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
* public key does not match IA32_SGXLEPUBKEYHASH.
* %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
*/
enum sgx_return_code {
SGX_NOT_TRACKED = 11,
+ SGX_CHILD_PRESENT = 13,
SGX_INVALID_EINITTOKEN = 16,
SGX_UNMASKED_EVENT = 128,
};
@@ -271,7 +296,7 @@ struct sgx_pcmd {
* @header1: constant byte string
* @vendor: must be either 0x0000 or 0x8086
* @date: YYYYMMDD in BCD
- * @header2: costant byte string
+ * @header2: constant byte string
* @swdefined: software defined value
*/
struct sgx_sigstruct_header {
@@ -335,4 +360,19 @@ struct sgx_sigstruct {
#define SGX_LAUNCH_TOKEN_SIZE 304
-#endif /* _ASM_X86_SGX_ARCH_H */
+/*
+ * Do not put any hardware-defined SGX structure representations below this
+ * comment!
+ */
+
+#ifdef CONFIG_X86_SGX_KVM
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+ int *trapnr);
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+ void __user *secs, u64 *lepubkeyhash, int *trapnr);
+#endif
+
+int sgx_set_attribute(unsigned long *allowed_attributes,
+ unsigned int attribute_fd);
+
+#endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 0bc9b0895f33..d17b39893b79 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -11,6 +11,7 @@
#include <asm/nops.h>
#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
/* "Raw" instruction opcodes */
#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
@@ -18,8 +19,6 @@
#ifdef __ASSEMBLY__
-#include <asm/alternative-asm.h>
-
#ifdef CONFIG_X86_SMAP
#define ASM_CLAC \
@@ -37,8 +36,6 @@
#else /* __ASSEMBLY__ */
-#include <asm/alternative.h>
-
#ifdef CONFIG_X86_SMAP
static __always_inline void clac(void)
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 9f69cc497f4b..b5f0d2ff47e4 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -71,12 +71,7 @@ static inline void update_task_stack(struct task_struct *task)
else
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
- /*
- * x86-64 updates x86_tss.sp1 via cpu_current_top_of_stack. That
- * doesn't work on x86-32 because sp1 and
- * cpu_current_top_of_stack have different values (because of
- * the non-zero stack-padding on 32bit).
- */
+ /* Xen PV enters the kernel on the thread stack. */
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
#endif
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index a84333adeef2..80c08c7d5e72 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -80,6 +80,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
}
#define __COND_SYSCALL(abi, name) \
+ __weak long __##abi##_##name(const struct pt_regs *__unused); \
__weak long __##abi##_##name(const struct pt_regs *__unused) \
{ \
return sys_ni_syscall(); \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 06b740bae431..de406d93b515 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -197,13 +197,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#endif
}
-#else /* !__ASSEMBLY__ */
-
-#ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
-#endif
-
-#endif
+#endif /* !__ASSEMBLY__ */
/*
* Thread-synchronous status.
diff --git a/arch/x86/include/asm/uv/uv_geo.h b/arch/x86/include/asm/uv/uv_geo.h
index f241451035fb..027a9258dbca 100644
--- a/arch/x86/include/asm/uv/uv_geo.h
+++ b/arch/x86/include/asm/uv/uv_geo.h
@@ -10,7 +10,7 @@
#ifndef _ASM_UV_GEO_H
#define _ASM_UV_GEO_H
-/* Type declaractions */
+/* Type declarations */
/* Size of a geoid_s structure (must be before decl. of geoid_u) */
#define GEOID_SIZE 8
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 5002f52be332..d3e3197917be 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -353,7 +353,7 @@ union uvh_apicid {
*
* Note there are NO leds on a UV system. This register is only
* used by the system controller to monitor system-wide operation.
- * There are 64 regs per node. With Nahelem cpus (2 cores per node,
+ * There are 64 regs per node. With Nehalem cpus (2 cores per node,
* 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
* a node.
*
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 600a141c8805..b25d3f82c2f3 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -234,7 +234,7 @@ struct boot_params {
* handling of page tables.
*
* These enums should only ever be used by x86 code, and the code that uses
- * it should be well contained and compartamentalized.
+ * it should be well contained and compartmentalized.
*
* KVM and Xen HVM do not have a subarch as these are expected to follow
* standard x86 boot entries. If there is a genuine need for "hypervisor" type
@@ -252,7 +252,7 @@ struct boot_params {
* @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
* which start at asm startup_xen() entry point and later jump to the C
* xen_start_kernel() entry point. Both domU and dom0 type of guests are
- * currently supportd through this PV boot path.
+ * currently supported through this PV boot path.
* @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform
* systems which do not have the PCI legacy interfaces.
* @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index d95d080b30e3..0007ba077c0c 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -24,6 +24,7 @@
#define DR_TRAP3 (0x8) /* db3 */
#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
+#define DR_BUS_LOCK (0x800) /* bus_lock */
#define DR_STEP (0x4000) /* single-step */
#define DR_SWITCH (0x8000) /* task switch */
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index b3d0664fadc9..ac83e25bbf37 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -12,7 +12,7 @@
* The msqid64_ds structure for x86 architecture with x32 ABI.
*
* On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
* from other 32-bit architectures.
*/
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 9034f3007c4e..9690d6899ad9 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -152,7 +152,7 @@ struct sgx_enclave_run {
* Most exceptions reported on ENCLU, including those that occur within the
* enclave, are fixed up and reported synchronously instead of being delivered
* via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
- * never fixed up and are always delivered via standard signals. On synchrously
+ * never fixed up and are always delivered via standard signals. On synchronously
* reported exceptions, -EFAULT is returned and details about the exception are
* recorded in @run.exception, the optional sgx_enclave_exception struct.
*
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index f0305dc660c9..fce18eaa070c 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -9,7 +9,7 @@
* The shmid64_ds structure for x86 architecture with x32 ABI.
*
* On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
* from other 32-bit architectures.
*/
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 844d60eb1882..d0d9b331d3a1 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -139,7 +139,7 @@ struct _fpstate_32 {
* The 64-bit FPU frame. (FXSAVE format and later)
*
* Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is
- * larger: 'struct _xstate'. Note that 'struct _xstate' embedds
+ * larger: 'struct _xstate'. Note that 'struct _xstate' embeds
* 'struct _fpstate' so that you can always assume the _fpstate portion
* exists so that you can check the magic value.
*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ddf08351f0b..0704c2a94272 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -35,7 +35,6 @@ KASAN_SANITIZE_sev-es.o := n
KCSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD_test_nx.o := y
-OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
ifdef CONFIG_FRAME_POINTER
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -121,7 +120,7 @@ obj-$(CONFIG_AMD_NB) += amd_nb.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 14cd3186dc77..e90310cbe73a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -830,7 +830,7 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
EXPORT_SYMBOL(acpi_unregister_ioapic);
/**
- * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base
+ * acpi_ioapic_registered - Check whether IOAPIC associated with @gsi_base
* has been registered
* @handle: ACPI handle of the IOAPIC device
* @gsi_base: GSI base associated with the IOAPIC
@@ -1656,7 +1656,7 @@ static int __init parse_acpi(char *arg)
else if (strcmp(arg, "noirq") == 0) {
acpi_noirq_set();
}
- /* "acpi=copy_dsdt" copys DSDT */
+ /* "acpi=copy_dsdt" copies DSDT */
else if (strcmp(arg, "copy_dsdt") == 0) {
acpi_gbl_copy_dsdt_locally = 1;
}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index cc1fea76aab0..3f85fcae450c 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -41,7 +41,7 @@ unsigned long acpi_get_wakeup_address(void)
* x86_acpi_enter_sleep_state - enter sleep state
* @state: Sleep state to enter.
*
- * Wrapper around acpi_enter_sleep_state() to be called by assmebly.
+ * Wrapper around acpi_enter_sleep_state() to be called by assembly.
*/
asmlinkage acpi_status __visible x86_acpi_enter_sleep_state(u8 state)
{
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8d778e46725d..f810e6fececd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -28,6 +28,7 @@
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/fixmap.h>
+#include <asm/paravirt.h>
int __read_mostly alternatives_patched;
@@ -388,21 +389,31 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
*/
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
+ /* Mask away "NOT" flag bit for feature to test. */
+ u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
BUG_ON(a->instrlen > sizeof(insn_buff));
- BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
- if (!boot_cpu_has(a->cpuid)) {
+ BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);
+
+ /*
+ * Patch if either:
+ * - feature is present
+ * - feature not present but ALTINSTR_FLAG_INV is set to mean,
+ * patch if feature is *NOT* present.
+ */
+ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
if (a->padlen > 1)
optimize_nops(a, instr);
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
- a->cpuid >> 5,
- a->cpuid & 0x1f,
+ DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+ (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
+ feature >> 5,
+ feature & 0x1f,
instr, instr, a->instrlen,
replacement, a->replacementlen, a->padlen);
@@ -605,7 +616,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
BUG_ON(p->len > MAX_PATCH_LEN);
/* prep the buffer with the original instructions */
memcpy(insn_buff, p->instr, p->len);
- used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
+ used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
BUG_ON(used > p->len);
@@ -723,6 +734,33 @@ void __init alternative_instructions(void)
* patching.
*/
+ /*
+ * Paravirt patching and alternative patching can be combined to
+ * replace a function call with a short direct code sequence (e.g.
+ * by setting a constant return value instead of doing that in an
+ * external function).
+ * In order to make this work the following sequence is required:
+ * 1. set (artificial) features depending on used paravirt
+ * functions which can later influence alternative patching
+ * 2. apply paravirt patching (generally replacing an indirect
+ * function call with a direct one)
+ * 3. apply alternative patching (e.g. replacing a direct function
+ * call with a custom code sequence)
+ * Doing paravirt patching after alternative patching would clobber
+ * the optimization of the custom code with a function call again.
+ */
+ paravirt_set_cap();
+
+ /*
+ * First patch paravirt functions, such that we overwrite the indirect
+ * call with the direct call.
+ */
+ apply_paravirt(__parainstructions, __parainstructions_end);
+
+ /*
+ * Then patch alternatives, such that those paravirt calls that are in
+ * alternatives can be overwritten by their immediate fragments.
+ */
apply_alternatives(__alt_instructions, __alt_instructions_end);
#ifdef CONFIG_SMP
@@ -741,8 +779,6 @@ void __init alternative_instructions(void)
}
#endif
- apply_paravirt(__parainstructions, __parainstructions_end);
-
restart_nmi();
alternatives_patched = 1;
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index b4396952c9a6..09083094eb57 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Shared support code for AMD K8 northbridges and derivates.
+ * Shared support code for AMD K8 northbridges and derivatives.
* Copyright 2006 Andi Kleen, SUSE Labs.
*/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4f26700f314d..4a39fb429f15 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -619,7 +619,7 @@ static void setup_APIC_timer(void)
if (this_cpu_has(X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
- /* Make LAPIC timer preferrable over percpu HPET */
+ /* Make LAPIC timer preferable over percpu HPET */
lapic_clockevent.rating = 150;
}
@@ -666,7 +666,7 @@ void lapic_update_tsc_freq(void)
* In this functions we calibrate APIC bus clocks to the external timer.
*
* We want to do the calibration only once since we want to have local timer
- * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * irqs synchronous. CPUs connected by the same APIC bus have the very same bus
* frequency.
*
* This was previously done by reading the PIT/HPET and waiting for a wrap
@@ -1532,7 +1532,7 @@ static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
* Most probably by now the CPU has serviced that pending interrupt and it
* might not have done the ack_APIC_irq() because it thought, interrupt
* came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
- * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
+ * the ISR bit and cpu thinks it has already serviced the interrupt. Hence
* a vector might get locked. It was noticed for timer irq (vector
* 0x31). Issue an extra EOI to clear ISR.
*
@@ -1657,7 +1657,7 @@ static void setup_local_APIC(void)
*/
/*
* Actually disabling the focus CPU check just makes the hang less
- * frequent as it makes the interrupt distributon model be more
+ * frequent as it makes the interrupt distribution model be more
* like LRU than MRU (the short-term load is more even across CPUs).
*/
@@ -1875,7 +1875,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
/*
* Without IR, all CPUs can be addressed by IOAPIC/MSI only
- * in physical mode, and CPUs with an APIC ID that cannnot
+ * in physical mode, and CPUs with an APIC ID that cannot
* be addressed must not be brought online.
*/
x2apic_set_max_apicid(apic_limit);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 73ff4dd426a8..d5c691a3208b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -928,7 +928,7 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
/*
* setup_IO_APIC_irqs() programs all legacy IRQs with default trigger
- * and polarity attirbutes. So allow the first user to reprogram the
+ * and polarity attributes. So allow the first user to reprogram the
* pin with real trigger and polarity attributes.
*/
if (irq < nr_legacy_irqs() && data->count == 1) {
@@ -994,7 +994,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
/*
* Legacy ISA IRQ has already been allocated, just add pin to
- * the pin list assoicated with this IRQ and program the IOAPIC
+ * the pin list associated with this IRQ and program the IOAPIC
* entry. The IOAPIC entry
*/
if (irq_data && irq_data->parent_data) {
@@ -1752,7 +1752,7 @@ static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
* with masking the ioapic entry and then polling until
* Remote IRR was clear before reprogramming the
* ioapic I don't trust the Remote IRR bit to be
- * completey accurate.
+ * completely accurate.
*
* However there appears to be no other way to plug
* this race, so if the Remote IRR bit is not
@@ -1830,7 +1830,7 @@ static void ioapic_ack_level(struct irq_data *irq_data)
/*
* Tail end of clearing remote IRR bit (either by delivering the EOI
* message via io-apic EOI register write or simulating it using
- * mask+edge followed by unnask+level logic) manually when the
+ * mask+edge followed by unmask+level logic) manually when the
* level triggered interrupt is seen as the edge triggered interrupt
* at the cpu.
*/
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3c9c7492252f..6dbdc7c22bb7 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -543,6 +543,14 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
return -ENOSYS;
+ /*
+ * Catch any attempt to touch the cascade interrupt on a PIC
+ * equipped system.
+ */
+ if (WARN_ON_ONCE(info->flags & X86_IRQ_ALLOC_LEGACY &&
+ virq == PIC_CASCADE_IR))
+ return -EINVAL;
+
for (i = 0; i < nr_irqs; i++) {
irqd = irq_domain_get_irq_data(domain, virq + i);
BUG_ON(!irqd);
@@ -745,6 +753,11 @@ void __init lapic_assign_system_vectors(void)
/* Mark the preallocated legacy interrupts */
for (i = 0; i < nr_legacy_irqs(); i++) {
+ /*
+ * Don't touch the cascade interrupt. It's unusable
+ * on PIC equipped machines. See the large comment
+ * in the IO/APIC code.
+ */
if (i != PIC_CASCADE_IR)
irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
}
@@ -1045,7 +1058,7 @@ void irq_force_complete_move(struct irq_desc *desc)
*
* But in case of cpu hotplug this should be a non issue
* because if the affinity update happens right before all
- * cpus rendevouz in stop machine, there is no way that the
+ * cpus rendezvous in stop machine, there is no way that the
* interrupt can be blocked on the target cpu because all cpus
* loops first with interrupts enabled in stop machine, so the
* old vector is not yet cleaned up when the interrupt fires.
@@ -1054,7 +1067,7 @@ void irq_force_complete_move(struct irq_desc *desc)
* of the interrupt on the apic/system bus would be delayed
* beyond the point where the target cpu disables interrupts
* in stop machine. I doubt that it can happen, but at least
- * there is a theroretical chance. Virtualization might be
+ * there is a theoretical chance. Virtualization might be
* able to expose this, but AFAICT the IOAPIC emulation is not
* as stupid as the real hardware.
*
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 52bc217ca8c3..f5a48e66e4f5 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -369,6 +369,15 @@ static int __init early_get_arch_type(void)
return ret;
}
+/* UV system found, check which APIC MODE BIOS already selected */
+static void __init early_set_apic_mode(void)
+{
+ if (x2apic_enabled())
+ uv_system_type = UV_X2APIC;
+ else
+ uv_system_type = UV_LEGACY_APIC;
+}
+
static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id)
{
/* Save OEM_ID passed from ACPI MADT */
@@ -404,11 +413,12 @@ static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id)
else
uv_hubless_system |= 0x8;
- /* Copy APIC type */
+ /* Copy OEM Table ID */
uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n",
oem_id, oem_table_id, uv_system_type, uv_hubless_system);
+
return 0;
}
@@ -453,6 +463,7 @@ static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id)
early_set_hub_type();
/* Other UV setup functions */
+ early_set_apic_mode();
early_get_pnodeid();
early_get_apic_socketid_shift();
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
@@ -472,29 +483,14 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
if (uv_set_system_type(_oem_id, _oem_table_id) == 0)
return 0;
- /* Save and Decode OEM Table ID */
+ /* Save for display of the OEM Table ID */
uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
- /* This is the most common hardware variant, x2apic mode */
- if (!strcmp(oem_table_id, "UVX"))
- uv_system_type = UV_X2APIC;
-
- /* Only used for very small systems, usually 1 chassis, legacy mode */
- else if (!strcmp(oem_table_id, "UVL"))
- uv_system_type = UV_LEGACY_APIC;
-
- else
- goto badbios;
-
pr_info("UV: OEM IDs %s/%s, System/UVType %d/0x%x, HUB RevID %d\n",
oem_id, oem_table_id, uv_system_type, is_uv(UV_ANY),
uv_min_hub_revision_id);
return 0;
-
-badbios:
- pr_err("UV: UVarchtype:%s not supported\n", uv_archtype);
- BUG();
}
enum uv_system_type get_uv_system_type(void)
@@ -1671,6 +1667,9 @@ static __init int uv_system_init_hubless(void)
if (rc < 0)
return rc;
+ /* Set section block size for current node memory */
+ set_block_size();
+
/* Create user access node */
if (rc >= 0)
uv_setup_proc_files(1);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 660270359d39..241dda687eb9 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -94,7 +94,7 @@
* Remove APM dependencies in arch/i386/kernel/process.c
* Remove APM dependencies in drivers/char/sysrq.c
* Reset time across standby.
- * Allow more inititialisation on SMP.
+ * Allow more initialisation on SMP.
* Remove CONFIG_APM_POWER_OFF and make it boot time
* configurable (default on).
* Make debug only a boot time parameter (remove APM_DEBUG).
@@ -766,7 +766,7 @@ static int apm_driver_version(u_short *val)
* not cleared until it is acknowledged.
*
* Additional information is returned in the info pointer, providing
- * that APM 1.2 is in use. If no messges are pending the value 0x80
+ * that APM 1.2 is in use. If no messages are pending the value 0x80
* is returned (No power management events pending).
*/
static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
@@ -1025,7 +1025,7 @@ static int apm_enable_power_management(int enable)
* status which gives the rough battery status, and current power
* source. The bat value returned give an estimate as a percentage
* of life and a status value for the battery. The estimated life
- * if reported is a lifetime in secodnds/minutes at current powwer
+ * if reported is a lifetime in seconds/minutes at current power
* consumption.
*/
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 60b9f42ce3c1..ecd3fd6993d1 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -61,13 +61,6 @@ static void __used common(void)
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
#endif
-#ifdef CONFIG_PARAVIRT_XXL
- BLANK();
- OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable);
- OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable);
- OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret);
-#endif
-
#ifdef CONFIG_XEN
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 3ca9be482a9e..d66af2950e06 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -877,7 +877,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
struct _cpuid4_info_regs *base)
{
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cpu_cacheinfo *this_cpu_ci;
struct cacheinfo *this_leaf;
int i, sibling;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ab640abe26b6..99e1656b326e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -482,7 +482,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
if (pk)
pk->pkru = init_pkru_value;
/*
- * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
+ * Setting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
* cpuid bit to be set. We need to ensure that we
* update that bit in this CPU's "cpu_info".
*/
@@ -1330,7 +1330,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_set_bug_bits(c);
- cpu_set_core_cap_bits(c);
+ sld_setup(c);
fpu__init_system(c);
@@ -1404,7 +1404,7 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
* where GS is unused by the prev and next threads.
*
* Since neither vendor documents this anywhere that I can see,
- * detect it directly instead of hardcoding the choice by
+ * detect it directly instead of hard-coding the choice by
* vendor.
*
* I've designated AMD's behavior as the "bug" because it's
@@ -1748,6 +1748,8 @@ DEFINE_PER_CPU(bool, hardirq_stack_inuse);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
+
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 42af31b64c2c..defda61f372d 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -72,6 +72,9 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW },
{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
{ X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
+ { X86_FEATURE_SGX_LC, X86_FEATURE_SGX },
+ { X86_FEATURE_SGX1, X86_FEATURE_SGX },
+ { X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
{}
};
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 1d9b8aaea06c..7227c15299d0 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -291,7 +291,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
mark_tsc_unstable("cyrix 5510/5520 detected");
}
#endif
- c->x86_cache_size = 16; /* Yep 16K integrated cache thats it */
+ c->x86_cache_size = 16; /* Yep 16K integrated cache that's it */
/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index 3b1b01f2b248..da696eb4821a 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -93,15 +93,9 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
}
#endif /* CONFIG_X86_VMX_FEATURE_NAMES */
-static void clear_sgx_caps(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_SGX);
- setup_clear_cpu_cap(X86_FEATURE_SGX_LC);
-}
-
static int __init nosgx(char *str)
{
- clear_sgx_caps();
+ setup_clear_cpu_cap(X86_FEATURE_SGX);
return 0;
}
@@ -110,23 +104,30 @@ early_param("nosgx", nosgx);
void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
{
+ bool enable_sgx_kvm = false, enable_sgx_driver = false;
bool tboot = tboot_enabled();
- bool enable_sgx;
+ bool enable_vmx;
u64 msr;
if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) {
clear_cpu_cap(c, X86_FEATURE_VMX);
- clear_sgx_caps();
+ clear_cpu_cap(c, X86_FEATURE_SGX);
return;
}
- /*
- * Enable SGX if and only if the kernel supports SGX and Launch Control
- * is supported, i.e. disable SGX if the LE hash MSRs can't be written.
- */
- enable_sgx = cpu_has(c, X86_FEATURE_SGX) &&
- cpu_has(c, X86_FEATURE_SGX_LC) &&
- IS_ENABLED(CONFIG_X86_SGX);
+ enable_vmx = cpu_has(c, X86_FEATURE_VMX) &&
+ IS_ENABLED(CONFIG_KVM_INTEL);
+
+ if (cpu_has(c, X86_FEATURE_SGX) && IS_ENABLED(CONFIG_X86_SGX)) {
+ /*
+ * Separate out SGX driver enabling from KVM. This allows KVM
+ * guests to use SGX even if the kernel SGX driver refuses to
+ * use it. This happens if flexible Launch Control is not
+ * available.
+ */
+ enable_sgx_driver = cpu_has(c, X86_FEATURE_SGX_LC);
+ enable_sgx_kvm = enable_vmx && IS_ENABLED(CONFIG_X86_SGX_KVM);
+ }
if (msr & FEAT_CTL_LOCKED)
goto update_caps;
@@ -142,15 +143,18 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
* i.e. KVM is enabled, to avoid unnecessarily adding an attack vector
* for the kernel, e.g. using VMX to hide malicious code.
*/
- if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) {
+ if (enable_vmx) {
msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
if (tboot)
msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX;
}
- if (enable_sgx)
- msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED;
+ if (enable_sgx_kvm || enable_sgx_driver) {
+ msr |= FEAT_CTL_SGX_ENABLED;
+ if (enable_sgx_driver)
+ msr |= FEAT_CTL_SGX_LC_ENABLED;
+ }
wrmsrl(MSR_IA32_FEAT_CTL, msr);
@@ -173,10 +177,29 @@ update_caps:
}
update_sgx:
- if (!(msr & FEAT_CTL_SGX_ENABLED) ||
- !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) {
- if (enable_sgx)
- pr_err_once("SGX disabled by BIOS\n");
- clear_sgx_caps();
+ if (!(msr & FEAT_CTL_SGX_ENABLED)) {
+ if (enable_sgx_kvm || enable_sgx_driver)
+ pr_err_once("SGX disabled by BIOS.\n");
+ clear_cpu_cap(c, X86_FEATURE_SGX);
+ return;
+ }
+
+ /*
+ * VMX feature bit may be cleared due to being disabled in BIOS,
+ * in which case SGX virtualization cannot be supported either.
+ */
+ if (!cpu_has(c, X86_FEATURE_VMX) && enable_sgx_kvm) {
+ pr_err_once("SGX virtualization disabled due to lack of VMX.\n");
+ enable_sgx_kvm = 0;
+ }
+
+ if (!(msr & FEAT_CTL_SGX_LC_ENABLED) && enable_sgx_driver) {
+ if (!enable_sgx_kvm) {
+ pr_err_once("SGX Launch Control is locked. Disable SGX.\n");
+ clear_cpu_cap(c, X86_FEATURE_SGX);
+ } else {
+ pr_err_once("SGX Launch Control is locked. Support SGX virtualization only.\n");
+ clear_cpu_cap(c, X86_FEATURE_SGX_LC);
+ }
}
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0e422a544835..fe0bec14d7ec 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -44,9 +44,9 @@ enum split_lock_detect_state {
};
/*
- * Default to sld_off because most systems do not support split lock detection
- * split_lock_setup() will switch this to sld_warn on systems that support
- * split lock detect, unless there is a command line override.
+ * Default to sld_off because most systems do not support split lock detection.
+ * sld_state_setup() will switch this to sld_warn on systems that support
+ * split lock/bus lock detect, unless there is a command line override.
*/
static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init;
@@ -301,7 +301,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* The operating system must reload CR3 to cause the TLB to be flushed"
*
* As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
- * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+ * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE
* to be modified.
*/
if (c->x86 == 5 && c->x86_model == 9) {
@@ -603,6 +603,7 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
}
static void split_lock_init(void);
+static void bus_lock_init(void);
static void init_intel(struct cpuinfo_x86 *c)
{
@@ -720,6 +721,7 @@ static void init_intel(struct cpuinfo_x86 *c)
tsx_disable();
split_lock_init();
+ bus_lock_init();
intel_init_thermal(c);
}
@@ -1020,16 +1022,15 @@ static bool split_lock_verify_msr(bool on)
return ctrl == tmp;
}
-static void __init split_lock_setup(void)
+static void __init sld_state_setup(void)
{
enum split_lock_detect_state state = sld_warn;
char arg[20];
int i, ret;
- if (!split_lock_verify_msr(false)) {
- pr_info("MSR access failed: Disabled\n");
+ if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+ !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
return;
- }
ret = cmdline_find_option(boot_command_line, "split_lock_detect",
arg, sizeof(arg));
@@ -1041,17 +1042,14 @@ static void __init split_lock_setup(void)
}
}
}
+ sld_state = state;
+}
- switch (state) {
- case sld_off:
- pr_info("disabled\n");
+static void __init __split_lock_setup(void)
+{
+ if (!split_lock_verify_msr(false)) {
+ pr_info("MSR access failed: Disabled\n");
return;
- case sld_warn:
- pr_info("warning about user-space split_locks\n");
- break;
- case sld_fatal:
- pr_info("sending SIGBUS on user-space split_locks\n");
- break;
}
rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
@@ -1061,7 +1059,9 @@ static void __init split_lock_setup(void)
return;
}
- sld_state = state;
+ /* Restore the MSR to its cached value. */
+ wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
+
setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
}
@@ -1118,6 +1118,29 @@ bool handle_guest_split_lock(unsigned long ip)
}
EXPORT_SYMBOL_GPL(handle_guest_split_lock);
+static void bus_lock_init(void)
+{
+ u64 val;
+
+ /*
+ * Warn and fatal are handled by #AC for split lock if #AC for
+ * split lock is supported.
+ */
+ if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) ||
+ (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) &&
+ (sld_state == sld_warn || sld_state == sld_fatal)) ||
+ sld_state == sld_off)
+ return;
+
+ /*
+ * Enable #DB for bus lock. All bus locks are handled in #DB except
+ * split locks are handled in #AC in the fatal case.
+ */
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, val);
+ val |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+}
+
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
@@ -1126,6 +1149,21 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code)
return true;
}
+void handle_bus_lock(struct pt_regs *regs)
+{
+ switch (sld_state) {
+ case sld_off:
+ break;
+ case sld_warn:
+ pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
+ current->comm, current->pid, regs->ip);
+ break;
+ case sld_fatal:
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+ break;
+ }
+}
+
/*
* This function is called only when switching between tasks with
* different split-lock detection modes. It sets the MSR for the
@@ -1166,7 +1204,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
{}
};
-void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
+static void __init split_lock_setup(struct cpuinfo_x86 *c)
{
const struct x86_cpu_id *m;
u64 ia32_core_caps;
@@ -1193,5 +1231,40 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
}
cpu_model_supports_sld = true;
- split_lock_setup();
+ __split_lock_setup();
+}
+
+static void sld_state_show(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
+ !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+ return;
+
+ switch (sld_state) {
+ case sld_off:
+ pr_info("disabled\n");
+ break;
+ case sld_warn:
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+ pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
+ else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+ pr_info("#DB: warning on user-space bus_locks\n");
+ break;
+ case sld_fatal:
+ if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
+ pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n");
+ } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
+ pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n",
+ boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ?
+ " from non-WB" : "");
+ }
+ break;
+ }
+}
+
+void __init sld_setup(struct cpuinfo_x86 *c)
+{
+ split_lock_setup(c);
+ sld_state_setup();
+ sld_state_show();
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7962355436da..bf7fe87a7e88 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -529,7 +529,7 @@ static void mce_irq_work_cb(struct irq_work *entry)
* Check if the address reported by the CPU is in a format we can parse.
* It would be possible to add code for most other cases, but all would
* be somewhat complicated (e.g. segment offset would require an instruction
- * parser). So only support physical addresses up to page granuality for now.
+ * parser). So only support physical addresses up to page granularity for now.
*/
int mce_usable_address(struct mce *m)
{
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 7b360731fc2d..4e86d97f9653 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -74,6 +74,7 @@ MCE_INJECT_SET(status);
MCE_INJECT_SET(misc);
MCE_INJECT_SET(addr);
MCE_INJECT_SET(synd);
+MCE_INJECT_SET(ipid);
#define MCE_INJECT_GET(reg) \
static int inj_##reg##_get(void *data, u64 *val) \
@@ -88,11 +89,13 @@ MCE_INJECT_GET(status);
MCE_INJECT_GET(misc);
MCE_INJECT_GET(addr);
MCE_INJECT_GET(synd);
+MCE_INJECT_GET(ipid);
DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
static void setup_inj_struct(struct mce *m)
{
@@ -629,6 +632,8 @@ static const char readme_msg[] =
"\t is present in hardware. \n"
"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
"\t APIC interrupt handler to handle the error. \n"
+"\n"
+"ipid:\t IPID (AMD-specific)\n"
"\n";
static ssize_t
@@ -652,6 +657,7 @@ static struct dfs_node {
{ .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
+ { .name = "ipid", .fops = &ipid_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
{ .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 83df991314c5..55ffa84d30d6 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -142,7 +142,7 @@ static struct severity {
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
),
MCESEV(
- KEEP, "Non signalled machine check",
+ KEEP, "Non signaled machine check",
SER, BITCLR(MCI_STATUS_S)
),
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index b935e1b5f115..6a6318e9590c 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -629,16 +629,16 @@ static ssize_t reload_store(struct device *dev,
if (val != 1)
return size;
- tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
- if (tmp_ret != UCODE_NEW)
- return size;
-
get_online_cpus();
ret = check_online_cpus();
if (ret)
goto put;
+ tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+ if (tmp_ret != UCODE_NEW)
+ goto put;
+
mutex_lock(&microcode_mutex);
ret = microcode_reload_late();
mutex_unlock(&microcode_mutex);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e88bc296afca..415bc05d3dc7 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -197,7 +197,7 @@ static unsigned char hv_get_nmi_reason(void)
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Prior to WS2016 Debug-VM sends NMIs to all CPUs which makes
- * it dificult to process CHANNELMSG_UNLOAD in case of crash. Handle
+ * it difficult to process CHANNELMSG_UNLOAD in case of crash. Handle
* unknown NMI on the first CPU which gets it.
*/
static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
@@ -428,7 +428,7 @@ static void __init ms_hyperv_init_platform(void)
/*
* Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
- * set x2apic destination mode to physcial mode when x2apic is available
+ * set x2apic destination mode to physical mode when x2apic is available
* and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
* have 8-bit APIC id.
*/
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 9231640782fa..0c3b372318b7 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -434,7 +434,7 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
state->range_sizek = sizek - second_sizek;
}
-/* Mininum size of mtrr block that can take hole: */
+/* Minimum size of mtrr block that can take hole: */
static u64 mtrr_chunk_size __initdata = (256ULL<<20);
static int __init parse_mtrr_chunk_size_opt(char *p)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 28c8a23aa42e..a76694bffe86 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -799,7 +799,7 @@ void mtrr_ap_init(void)
*
* This routine is called in two cases:
*
- * 1. very earily time of software resume, when there absolutely
+ * 1. very early time of software resume, when there absolutely
* isn't mtrr entry changes;
*
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 698bb26aeb6e..23001ae03e82 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -192,7 +192,7 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
* Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz
* Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz
*
- * Probe by trying to write the first of the L3 cach mask registers
+ * Probe by trying to write the first of the L3 cache mask registers
* and checking that the bits stick. Max CLOSids is always 4 and max cbm length
* is always 20 on hsw server parts. The minimum cache bitmask length
* allowed for HSW server is always 2 bits. Hardcode all of them.
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 7ac31210e452..dbeaa8409313 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -387,7 +387,7 @@ void mon_event_count(void *info)
* adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
* that:
*
- * current bandwdith(cur_bw) < user specified bandwidth(user_bw)
+ * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
*
* This uses the MBM counters to measure the bandwidth and MBA throttle
* MSRs to control the bandwidth for a particular rdtgrp. It builds on the
@@ -397,7 +397,7 @@ void mon_event_count(void *info)
* timer. Having 1s interval makes the calculation of bandwidth simpler.
*
* Although MBA's goal is to restrict the bandwidth to a maximum, there may
- * be a need to increase the bandwidth to avoid uncecessarily restricting
+ * be a need to increase the bandwidth to avoid unnecessarily restricting
* the L2 <-> L3 traffic.
*
* Since MBA controls the L2 external bandwidth where as MBM measures the
@@ -480,7 +480,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
/*
* Delta values are updated dynamically package wise for each
- * rdtgrp everytime the throttle MSR changes value.
+ * rdtgrp every time the throttle MSR changes value.
*
* This is because (1)the increase in bandwidth is not perfectly
* linear and only "approximately" linear even when the hardware
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index e916646adc69..935af2ac6b1a 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1307,7 +1307,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
* If the thread does not get on the CPU for whatever
* reason and the process which sets up the region is
* interrupted then this will leave the thread in runnable
- * state and once it gets on the CPU it will derefence
+ * state and once it gets on the CPU it will dereference
* the cleared, but not freed, plr struct resulting in an
* empty pseudo-locking loop.
*/
@@ -1391,7 +1391,7 @@ out:
* group is removed from user space via a "rmdir" from userspace or the
* unmount of the resctrl filesystem. On removal the resource group does
* not go back to pseudo-locksetup mode before it is removed, instead it is
- * removed directly. There is thus assymmetry with the creation where the
+ * removed directly. There is thus asymmetry with the creation where the
* &struct pseudo_lock_region is removed here while it was not created in
* rdtgroup_pseudo_lock_create().
*
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index f9190adc52cb..01fd30e7829d 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * User interface for Resource Alloction in Resource Director Technology(RDT)
+ * User interface for Resource Allocation in Resource Director Technology(RDT)
*
* Copyright (C) 2016 Intel Corporation
*
@@ -294,7 +294,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
/*
* This is safe against resctrl_sched_in() called from __switch_to()
* because __switch_to() is executed with interrupts disabled. A local call
- * from update_closid_rmid() is proteced against __switch_to() because
+ * from update_closid_rmid() is protected against __switch_to() because
* preemption is disabled.
*/
static void update_cpu_closid_rmid(void *info)
@@ -2555,7 +2555,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
/*
* This creates a directory mon_data which contains the monitored data.
*
- * mon_data has one directory for each domain whic are named
+ * mon_data has one directory for each domain which are named
* in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
* with L3 domain looks as below:
* ./mon_data:
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 972ec3bfa9c0..21d1f062895a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -36,6 +36,8 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
{ X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
+ { X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
+ { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 91d3dc784a29..9c1656779b2a 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -3,3 +3,4 @@ obj-y += \
encl.o \
ioctl.o \
main.o
+obj-$(CONFIG_X86_SGX_KVM) += virt.o
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 8ce6d8371cfb..aa9b8b868867 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -136,10 +136,6 @@ static const struct file_operations sgx_encl_fops = {
.get_unmapped_area = sgx_get_unmapped_area,
};
-const struct file_operations sgx_provision_fops = {
- .owner = THIS_MODULE,
-};
-
static struct miscdevice sgx_dev_enclave = {
.minor = MISC_DYNAMIC_MINOR,
.name = "sgx_enclave",
@@ -147,13 +143,6 @@ static struct miscdevice sgx_dev_enclave = {
.fops = &sgx_encl_fops,
};
-static struct miscdevice sgx_dev_provision = {
- .minor = MISC_DYNAMIC_MINOR,
- .name = "sgx_provision",
- .nodename = "sgx_provision",
- .fops = &sgx_provision_fops,
-};
-
int __init sgx_drv_init(void)
{
unsigned int eax, ebx, ecx, edx;
@@ -187,11 +176,5 @@ int __init sgx_drv_init(void)
if (ret)
return ret;
- ret = misc_register(&sgx_dev_provision);
- if (ret) {
- misc_deregister(&sgx_dev_enclave);
- return ret;
- }
-
return 0;
}
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 7449ef33f081..3be203297988 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -7,7 +7,7 @@
#include <linux/shmem_fs.h>
#include <linux/suspend.h>
#include <linux/sched/mm.h>
-#include "arch.h"
+#include <asm/sgx.h>
#include "encl.h"
#include "encls.h"
#include "sgx.h"
@@ -78,7 +78,7 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
if (ret) {
- sgx_free_epc_page(epc_page);
+ sgx_encl_free_epc_page(epc_page);
return ERR_PTR(ret);
}
@@ -404,7 +404,7 @@ void sgx_encl_release(struct kref *ref)
if (sgx_unmark_page_reclaimable(entry->epc_page))
continue;
- sgx_free_epc_page(entry->epc_page);
+ sgx_encl_free_epc_page(entry->epc_page);
encl->secs_child_cnt--;
entry->epc_page = NULL;
}
@@ -415,7 +415,7 @@ void sgx_encl_release(struct kref *ref)
xa_destroy(&encl->page_array);
if (!encl->secs_child_cnt && encl->secs.epc_page) {
- sgx_free_epc_page(encl->secs.epc_page);
+ sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
}
@@ -423,7 +423,7 @@ void sgx_encl_release(struct kref *ref)
va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
list);
list_del(&va_page->list);
- sgx_free_epc_page(va_page->epc_page);
+ sgx_encl_free_epc_page(va_page->epc_page);
kfree(va_page);
}
@@ -686,7 +686,7 @@ struct sgx_epc_page *sgx_alloc_va_page(void)
ret = __epa(sgx_get_epc_virt_addr(epc_page));
if (ret) {
WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
- sgx_free_epc_page(epc_page);
+ sgx_encl_free_epc_page(epc_page);
return ERR_PTR(-EFAULT);
}
@@ -735,3 +735,24 @@ bool sgx_va_page_full(struct sgx_va_page *va_page)
return slot == SGX_VA_SLOT_COUNT;
}
+
+/**
+ * sgx_encl_free_epc_page - free an EPC page assigned to an enclave
+ * @page: EPC page to be freed
+ *
+ * Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
+ * only upon success, it puts the page back to free page list. Otherwise, it
+ * gives a WARNING to indicate page is leaked.
+ */
+void sgx_encl_free_epc_page(struct sgx_epc_page *page)
+{
+ int ret;
+
+ WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+
+ ret = __eremove(sgx_get_epc_virt_addr(page));
+ if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
+ return;
+
+ sgx_free_epc_page(page);
+}
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index d8d30ccbef4c..6e74f85b6264 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -115,5 +115,6 @@ struct sgx_epc_page *sgx_alloc_va_page(void);
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
bool sgx_va_page_full(struct sgx_va_page *va_page);
+void sgx_encl_free_epc_page(struct sgx_epc_page *page);
#endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h
index 443188fe7e70..9b204843b78d 100644
--- a/arch/x86/kernel/cpu/sgx/encls.h
+++ b/arch/x86/kernel/cpu/sgx/encls.h
@@ -11,21 +11,6 @@
#include <asm/traps.h>
#include "sgx.h"
-enum sgx_encls_function {
- ECREATE = 0x00,
- EADD = 0x01,
- EINIT = 0x02,
- EREMOVE = 0x03,
- EDGBRD = 0x04,
- EDGBWR = 0x05,
- EEXTEND = 0x06,
- ELDU = 0x08,
- EBLOCK = 0x09,
- EPA = 0x0A,
- EWB = 0x0B,
- ETRACK = 0x0C,
-};
-
/**
* ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
*
@@ -55,6 +40,19 @@ enum sgx_encls_function {
} while (0); \
}
+/*
+ * encls_faulted() - Check if an ENCLS leaf faulted given an error code
+ * @ret: the return value of an ENCLS leaf function call
+ *
+ * Return:
+ * - true: ENCLS leaf faulted.
+ * - false: Otherwise.
+ */
+static inline bool encls_faulted(int ret)
+{
+ return ret & ENCLS_FAULT_FLAG;
+}
+
/**
* encls_failed() - Check if an ENCLS function failed
* @ret: the return value of an ENCLS function call
@@ -65,7 +63,7 @@ enum sgx_encls_function {
*/
static inline bool encls_failed(int ret)
{
- if (ret & ENCLS_FAULT_FLAG)
+ if (encls_faulted(ret))
return ENCLS_TRAPNR(ret) != X86_TRAP_PF;
return !!ret;
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 90a5caf76939..83df20e3e633 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -2,6 +2,7 @@
/* Copyright(c) 2016-20 Intel Corporation. */
#include <asm/mman.h>
+#include <asm/sgx.h>
#include <linux/mman.h>
#include <linux/delay.h>
#include <linux/file.h>
@@ -47,7 +48,7 @@ static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
encl->page_cnt--;
if (va_page) {
- sgx_free_epc_page(va_page->epc_page);
+ sgx_encl_free_epc_page(va_page->epc_page);
list_del(&va_page->list);
kfree(va_page);
}
@@ -117,7 +118,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
return 0;
err_out:
- sgx_free_epc_page(encl->secs.epc_page);
+ sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
err_out_backing:
@@ -365,7 +366,7 @@ err_out_unlock:
mmap_read_unlock(current->mm);
err_out_free:
- sgx_free_epc_page(epc_page);
+ sgx_encl_free_epc_page(epc_page);
kfree(encl_page);
return ret;
@@ -495,7 +496,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
void *token)
{
u64 mrsigner[4];
- int i, j, k;
+ int i, j;
void *addr;
int ret;
@@ -544,8 +545,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
preempt_disable();
- for (k = 0; k < 4; k++)
- wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + k, mrsigner[k]);
+ sgx_update_lepubkeyhash(mrsigner);
ret = __einit(sigstruct, token, addr);
@@ -568,7 +568,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
}
}
- if (ret & ENCLS_FAULT_FLAG) {
+ if (encls_faulted(ret)) {
if (encls_failed(ret))
ENCLS_WARN(ret, "EINIT");
@@ -604,7 +604,6 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
{
struct sgx_sigstruct *sigstruct;
struct sgx_enclave_init init_arg;
- struct page *initp_page;
void *token;
int ret;
@@ -615,11 +614,15 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
if (copy_from_user(&init_arg, arg, sizeof(init_arg)))
return -EFAULT;
- initp_page = alloc_page(GFP_KERNEL);
- if (!initp_page)
+ /*
+ * 'sigstruct' must be on a page boundary and 'token' on a 512 byte
+ * boundary. kmalloc() will give this alignment when allocating
+ * PAGE_SIZE bytes.
+ */
+ sigstruct = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!sigstruct)
return -ENOMEM;
- sigstruct = kmap(initp_page);
token = (void *)((unsigned long)sigstruct + PAGE_SIZE / 2);
memset(token, 0, SGX_LAUNCH_TOKEN_SIZE);
@@ -645,8 +648,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
ret = sgx_encl_init(encl, sigstruct, token);
out:
- kunmap(initp_page);
- __free_page(initp_page);
+ kfree(sigstruct);
return ret;
}
@@ -665,24 +667,11 @@ out:
static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
{
struct sgx_enclave_provision params;
- struct file *file;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
- file = fget(params.fd);
- if (!file)
- return -EINVAL;
-
- if (file->f_op != &sgx_provision_fops) {
- fput(file);
- return -EINVAL;
- }
-
- encl->attributes_mask |= SGX_ATTR_PROVISIONKEY;
-
- fput(file);
- return 0;
+ return sgx_set_attribute(&encl->attributes_mask, params.fd);
}
long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 8df81a3ed945..63d3de02bbcc 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -1,14 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-20 Intel Corporation. */
+#include <linux/file.h>
#include <linux/freezer.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
+#include <linux/miscdevice.h>
#include <linux/pagemap.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
+#include <asm/sgx.h>
#include "driver.h"
#include "encl.h"
#include "encls.h"
@@ -23,42 +26,58 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
* with sgx_reclaimer_lock acquired.
*/
static LIST_HEAD(sgx_active_page_list);
-
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
+/* The free page list lock protected variables prepend the lock. */
+static unsigned long sgx_nr_free_pages;
+
+/* Nodes with one or more EPC sections. */
+static nodemask_t sgx_numa_mask;
+
+/*
+ * Array with one list_head for each possible NUMA node. Each
+ * list contains all the sgx_epc_section's which are on that
+ * node.
+ */
+static struct sgx_numa_node *sgx_numa_nodes;
+
+static LIST_HEAD(sgx_dirty_page_list);
+
/*
- * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS
- * pages whose child pages blocked EREMOVE.
+ * Reset post-kexec EPC pages to the uninitialized state. The pages are removed
+ * from the input list, and made available for the page allocator. SECS pages
+ * prepending their children in the input list are left intact.
*/
-static void sgx_sanitize_section(struct sgx_epc_section *section)
+static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
{
struct sgx_epc_page *page;
LIST_HEAD(dirty);
int ret;
- /* init_laundry_list is thread-local, no need for a lock: */
- while (!list_empty(&section->init_laundry_list)) {
+ /* dirty_page_list is thread-local, no need for a lock: */
+ while (!list_empty(dirty_page_list)) {
if (kthread_should_stop())
return;
- /* needed for access to ->page_list: */
- spin_lock(&section->lock);
-
- page = list_first_entry(&section->init_laundry_list,
- struct sgx_epc_page, list);
+ page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
ret = __eremove(sgx_get_epc_virt_addr(page));
- if (!ret)
- list_move(&page->list, &section->page_list);
- else
+ if (!ret) {
+ /*
+ * page is now sanitized. Make it available via the SGX
+ * page allocator:
+ */
+ list_del(&page->list);
+ sgx_free_epc_page(page);
+ } else {
+ /* The page is not yet clean - move to the dirty list. */
list_move_tail(&page->list, &dirty);
-
- spin_unlock(&section->lock);
+ }
cond_resched();
}
- list_splice(&dirty, &section->init_laundry_list);
+ list_splice(&dirty, dirty_page_list);
}
static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
@@ -195,10 +214,10 @@ static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl)
/*
* Swap page to the regular memory transformed to the blocked state by using
- * EBLOCK, which means that it can no loger be referenced (no new TLB entries).
+ * EBLOCK, which means that it can no longer be referenced (no new TLB entries).
*
* The first trial just tries to write the page assuming that some other thread
- * has reset the count for threads inside the enlave by using ETRACK, and
+ * has reset the count for threads inside the enclave by using ETRACK, and
* previous thread count has been zeroed out. The second trial calls ETRACK
* before EWB. If that fails we kick all the HW threads out, and then do EWB,
* which should be guaranteed the succeed.
@@ -278,7 +297,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
- sgx_free_epc_page(encl->secs.epc_page);
+ sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
sgx_encl_put_backing(&secs_backing, true);
@@ -308,6 +327,7 @@ static void sgx_reclaim_pages(void)
struct sgx_epc_section *section;
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
+ struct sgx_numa_node *node;
pgoff_t page_index;
int cnt = 0;
int ret;
@@ -379,50 +399,33 @@ skip:
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
section = &sgx_epc_sections[epc_page->section];
- spin_lock(&section->lock);
- list_add_tail(&epc_page->list, &section->page_list);
- section->free_cnt++;
- spin_unlock(&section->lock);
- }
-}
-
-static unsigned long sgx_nr_free_pages(void)
-{
- unsigned long cnt = 0;
- int i;
-
- for (i = 0; i < sgx_nr_epc_sections; i++)
- cnt += sgx_epc_sections[i].free_cnt;
+ node = section->node;
- return cnt;
+ spin_lock(&node->lock);
+ list_add_tail(&epc_page->list, &node->free_page_list);
+ sgx_nr_free_pages++;
+ spin_unlock(&node->lock);
+ }
}
static bool sgx_should_reclaim(unsigned long watermark)
{
- return sgx_nr_free_pages() < watermark &&
- !list_empty(&sgx_active_page_list);
+ return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
}
static int ksgxd(void *p)
{
- int i;
-
set_freezable();
/*
* Sanitize pages in order to recover from kexec(). The 2nd pass is
* required for SECS pages, whose child pages blocked EREMOVE.
*/
- for (i = 0; i < sgx_nr_epc_sections; i++)
- sgx_sanitize_section(&sgx_epc_sections[i]);
-
- for (i = 0; i < sgx_nr_epc_sections; i++) {
- sgx_sanitize_section(&sgx_epc_sections[i]);
+ __sgx_sanitize_pages(&sgx_dirty_page_list);
+ __sgx_sanitize_pages(&sgx_dirty_page_list);
- /* Should never happen. */
- if (!list_empty(&sgx_epc_sections[i].init_laundry_list))
- WARN(1, "EPC section %d has unsanitized pages.\n", i);
- }
+ /* sanity check: */
+ WARN_ON(!list_empty(&sgx_dirty_page_list));
while (!kthread_should_stop()) {
if (try_to_freeze())
@@ -454,45 +457,56 @@ static bool __init sgx_page_reclaimer_init(void)
return true;
}
-static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_section *section)
+static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
{
- struct sgx_epc_page *page;
+ struct sgx_numa_node *node = &sgx_numa_nodes[nid];
+ struct sgx_epc_page *page = NULL;
- spin_lock(&section->lock);
+ spin_lock(&node->lock);
- if (list_empty(&section->page_list)) {
- spin_unlock(&section->lock);
+ if (list_empty(&node->free_page_list)) {
+ spin_unlock(&node->lock);
return NULL;
}
- page = list_first_entry(&section->page_list, struct sgx_epc_page, list);
+ page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
list_del_init(&page->list);
- section->free_cnt--;
+ sgx_nr_free_pages--;
+
+ spin_unlock(&node->lock);
- spin_unlock(&section->lock);
return page;
}
/**
* __sgx_alloc_epc_page() - Allocate an EPC page
*
- * Iterate through EPC sections and borrow a free EPC page to the caller. When a
- * page is no longer needed it must be released with sgx_free_epc_page().
+ * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
+ * from the NUMA node, where the caller is executing.
*
* Return:
- * an EPC page,
- * -errno on error
+ * - an EPC page: A borrowed EPC pages were available.
+ * - NULL: Out of EPC pages.
*/
struct sgx_epc_page *__sgx_alloc_epc_page(void)
{
- struct sgx_epc_section *section;
struct sgx_epc_page *page;
- int i;
+ int nid_of_current = numa_node_id();
+ int nid = nid_of_current;
- for (i = 0; i < sgx_nr_epc_sections; i++) {
- section = &sgx_epc_sections[i];
+ if (node_isset(nid_of_current, sgx_numa_mask)) {
+ page = __sgx_alloc_epc_page_from_node(nid_of_current);
+ if (page)
+ return page;
+ }
+
+ /* Fall back to the non-local NUMA nodes: */
+ while (true) {
+ nid = next_node_in(nid, sgx_numa_mask);
+ if (nid == nid_of_current)
+ break;
- page = __sgx_alloc_epc_page_from_section(section);
+ page = __sgx_alloc_epc_page_from_node(nid);
if (page)
return page;
}
@@ -598,23 +612,22 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
* sgx_free_epc_page() - Free an EPC page
* @page: an EPC page
*
- * Call EREMOVE for an EPC page and insert it back to the list of free pages.
+ * Put the EPC page back to the list of free pages. It's the caller's
+ * responsibility to make sure that the page is in uninitialized state. In other
+ * words, do EREMOVE, EWB or whatever operation is necessary before calling
+ * this function.
*/
void sgx_free_epc_page(struct sgx_epc_page *page)
{
struct sgx_epc_section *section = &sgx_epc_sections[page->section];
- int ret;
+ struct sgx_numa_node *node = section->node;
- WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+ spin_lock(&node->lock);
- ret = __eremove(sgx_get_epc_virt_addr(page));
- if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret))
- return;
+ list_add_tail(&page->list, &node->free_page_list);
+ sgx_nr_free_pages++;
- spin_lock(&section->lock);
- list_add_tail(&page->list, &section->page_list);
- section->free_cnt++;
- spin_unlock(&section->lock);
+ spin_unlock(&node->lock);
}
static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
@@ -635,18 +648,14 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
}
section->phys_addr = phys_addr;
- spin_lock_init(&section->lock);
- INIT_LIST_HEAD(&section->page_list);
- INIT_LIST_HEAD(&section->init_laundry_list);
for (i = 0; i < nr_pages; i++) {
section->pages[i].section = index;
section->pages[i].flags = 0;
section->pages[i].owner = NULL;
- list_add_tail(&section->pages[i].list, &section->init_laundry_list);
+ list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
}
- section->free_cnt = nr_pages;
return true;
}
@@ -665,8 +674,13 @@ static bool __init sgx_page_cache_init(void)
{
u32 eax, ebx, ecx, edx, type;
u64 pa, size;
+ int nid;
int i;
+ sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL);
+ if (!sgx_numa_nodes)
+ return false;
+
for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx);
@@ -689,6 +703,21 @@ static bool __init sgx_page_cache_init(void)
break;
}
+ nid = numa_map_to_online_node(phys_to_target_node(pa));
+ if (nid == NUMA_NO_NODE) {
+ /* The physical address is already printed above. */
+ pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
+ nid = 0;
+ }
+
+ if (!node_isset(nid, sgx_numa_mask)) {
+ spin_lock_init(&sgx_numa_nodes[nid].lock);
+ INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
+ node_set(nid, sgx_numa_mask);
+ }
+
+ sgx_epc_sections[i].node = &sgx_numa_nodes[nid];
+
sgx_nr_epc_sections++;
}
@@ -700,6 +729,67 @@ static bool __init sgx_page_cache_init(void)
return true;
}
+/*
+ * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller.
+ * Bare-metal driver requires to update them to hash of enclave's signer
+ * before EINIT. KVM needs to update them to guest's virtual MSR values
+ * before doing EINIT from guest.
+ */
+void sgx_update_lepubkeyhash(u64 *lepubkeyhash)
+{
+ int i;
+
+ WARN_ON_ONCE(preemptible());
+
+ for (i = 0; i < 4; i++)
+ wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
+}
+
+const struct file_operations sgx_provision_fops = {
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice sgx_dev_provision = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "sgx_provision",
+ .nodename = "sgx_provision",
+ .fops = &sgx_provision_fops,
+};
+
+/**
+ * sgx_set_attribute() - Update allowed attributes given file descriptor
+ * @allowed_attributes: Pointer to allowed enclave attributes
+ * @attribute_fd: File descriptor for specific attribute
+ *
+ * Append enclave attribute indicated by file descriptor to allowed
+ * attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by
+ * /dev/sgx_provision is supported.
+ *
+ * Return:
+ * -0: SGX_ATTR_PROVISIONKEY is appended to allowed_attributes
+ * -EINVAL: Invalid, or not supported file descriptor
+ */
+int sgx_set_attribute(unsigned long *allowed_attributes,
+ unsigned int attribute_fd)
+{
+ struct file *file;
+
+ file = fget(attribute_fd);
+ if (!file)
+ return -EINVAL;
+
+ if (file->f_op != &sgx_provision_fops) {
+ fput(file);
+ return -EINVAL;
+ }
+
+ *allowed_attributes |= SGX_ATTR_PROVISIONKEY;
+
+ fput(file);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sgx_set_attribute);
+
static int __init sgx_init(void)
{
int ret;
@@ -716,12 +806,28 @@ static int __init sgx_init(void)
goto err_page_cache;
}
- ret = sgx_drv_init();
+ ret = misc_register(&sgx_dev_provision);
if (ret)
goto err_kthread;
+ /*
+ * Always try to initialize the native *and* KVM drivers.
+ * The KVM driver is less picky than the native one and
+ * can function if the native one is not supported on the
+ * current system or fails to initialize.
+ *
+ * Error out only if both fail to initialize.
+ */
+ ret = sgx_drv_init();
+
+ if (sgx_vepc_init() && ret)
+ goto err_provision;
+
return 0;
+err_provision:
+ misc_deregister(&sgx_dev_provision);
+
err_kthread:
kthread_stop(ksgxd_tsk);
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 5fa42d143feb..4628acec0009 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -8,11 +8,15 @@
#include <linux/rwsem.h>
#include <linux/types.h>
#include <asm/asm.h>
-#include "arch.h"
+#include <asm/sgx.h>
#undef pr_fmt
#define pr_fmt(fmt) "sgx: " fmt
+#define EREMOVE_ERROR_MESSAGE \
+ "EREMOVE returned %d (0x%x) and an EPC page was leaked. SGX may become unusable. " \
+ "Refer to Documentation/x86/sgx.rst for more information."
+
#define SGX_MAX_EPC_SECTIONS 8
#define SGX_EEXTEND_BLOCK_SIZE 256
#define SGX_NR_TO_SCAN 16
@@ -30,28 +34,25 @@ struct sgx_epc_page {
};
/*
+ * Contains the tracking data for NUMA nodes having EPC pages. Most importantly,
+ * the free page list local to the node is stored here.
+ */
+struct sgx_numa_node {
+ struct list_head free_page_list;
+ spinlock_t lock;
+};
+
+/*
* The firmware can define multiple chunks of EPC to the different areas of the
* physical memory e.g. for memory areas of the each node. This structure is
* used to store EPC pages for one EPC section and virtual memory area where
* the pages have been mapped.
- *
- * 'lock' must be held before accessing 'page_list' or 'free_cnt'.
*/
struct sgx_epc_section {
unsigned long phys_addr;
void *virt_addr;
struct sgx_epc_page *pages;
-
- spinlock_t lock;
- struct list_head page_list;
- unsigned long free_cnt;
-
- /*
- * Pages which need EREMOVE run on them before they can be
- * used. Only safe to be accessed in ksgxd and init code.
- * Not protected by locks.
- */
- struct list_head init_laundry_list;
+ struct sgx_numa_node *node;
};
extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
@@ -83,4 +84,15 @@ void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
+#ifdef CONFIG_X86_SGX_KVM
+int __init sgx_vepc_init(void);
+#else
+static inline int __init sgx_vepc_init(void)
+{
+ return -ENODEV;
+}
+#endif
+
+void sgx_update_lepubkeyhash(u64 *lepubkeyhash);
+
#endif /* _X86_SGX_H */
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
new file mode 100644
index 000000000000..6ad165a5c0cc
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device driver to expose SGX enclave memory to KVM guests.
+ *
+ * Copyright(c) 2021 Intel Corporation.
+ */
+
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <asm/sgx.h>
+#include <uapi/asm/sgx.h>
+
+#include "encls.h"
+#include "sgx.h"
+
+struct sgx_vepc {
+ struct xarray page_array;
+ struct mutex lock;
+};
+
+/*
+ * Temporary SECS pages that cannot be EREMOVE'd due to having child in other
+ * virtual EPC instances, and the lock to protect it.
+ */
+static struct mutex zombie_secs_pages_lock;
+static struct list_head zombie_secs_pages;
+
+static int __sgx_vepc_fault(struct sgx_vepc *vepc,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ struct sgx_epc_page *epc_page;
+ unsigned long index, pfn;
+ int ret;
+
+ WARN_ON(!mutex_is_locked(&vepc->lock));
+
+ /* Calculate index of EPC page in virtual EPC's page_array */
+ index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
+
+ epc_page = xa_load(&vepc->page_array, index);
+ if (epc_page)
+ return 0;
+
+ epc_page = sgx_alloc_epc_page(vepc, false);
+ if (IS_ERR(epc_page))
+ return PTR_ERR(epc_page);
+
+ ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
+ if (ret)
+ goto err_free;
+
+ pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
+
+ ret = vmf_insert_pfn(vma, addr, pfn);
+ if (ret != VM_FAULT_NOPAGE) {
+ ret = -EFAULT;
+ goto err_delete;
+ }
+
+ return 0;
+
+err_delete:
+ xa_erase(&vepc->page_array, index);
+err_free:
+ sgx_free_epc_page(epc_page);
+ return ret;
+}
+
+static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct sgx_vepc *vepc = vma->vm_private_data;
+ int ret;
+
+ mutex_lock(&vepc->lock);
+ ret = __sgx_vepc_fault(vepc, vma, vmf->address);
+ mutex_unlock(&vepc->lock);
+
+ if (!ret)
+ return VM_FAULT_NOPAGE;
+
+ if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
+ mmap_read_unlock(vma->vm_mm);
+ return VM_FAULT_RETRY;
+ }
+
+ return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct sgx_vepc_vm_ops = {
+ .fault = sgx_vepc_fault,
+};
+
+static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct sgx_vepc *vepc = file->private_data;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vma->vm_ops = &sgx_vepc_vm_ops;
+ /* Don't copy VMA in fork() */
+ vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY;
+ vma->vm_private_data = vepc;
+
+ return 0;
+}
+
+static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
+{
+ int ret;
+
+ /*
+ * Take a previously guest-owned EPC page and return it to the
+ * general EPC page pool.
+ *
+ * Guests can not be trusted to have left this page in a good
+ * state, so run EREMOVE on the page unconditionally. In the
+ * case that a guest properly EREMOVE'd this page, a superfluous
+ * EREMOVE is harmless.
+ */
+ ret = __eremove(sgx_get_epc_virt_addr(epc_page));
+ if (ret) {
+ /*
+ * Only SGX_CHILD_PRESENT is expected, which is because of
+ * EREMOVE'ing an SECS still with child, in which case it can
+ * be handled by EREMOVE'ing the SECS again after all pages in
+ * virtual EPC have been EREMOVE'd. See comments in below in
+ * sgx_vepc_release().
+ *
+ * The user of virtual EPC (KVM) needs to guarantee there's no
+ * logical processor is still running in the enclave in guest,
+ * otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
+ * handled here.
+ */
+ WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
+ ret, ret);
+ return ret;
+ }
+
+ sgx_free_epc_page(epc_page);
+
+ return 0;
+}
+
+static int sgx_vepc_release(struct inode *inode, struct file *file)
+{
+ struct sgx_vepc *vepc = file->private_data;
+ struct sgx_epc_page *epc_page, *tmp, *entry;
+ unsigned long index;
+
+ LIST_HEAD(secs_pages);
+
+ xa_for_each(&vepc->page_array, index, entry) {
+ /*
+ * Remove all normal, child pages. sgx_vepc_free_page()
+ * will fail if EREMOVE fails, but this is OK and expected on
+ * SECS pages. Those can only be EREMOVE'd *after* all their
+ * child pages. Retries below will clean them up.
+ */
+ if (sgx_vepc_free_page(entry))
+ continue;
+
+ xa_erase(&vepc->page_array, index);
+ }
+
+ /*
+ * Retry EREMOVE'ing pages. This will clean up any SECS pages that
+ * only had children in this 'epc' area.
+ */
+ xa_for_each(&vepc->page_array, index, entry) {
+ epc_page = entry;
+ /*
+ * An EREMOVE failure here means that the SECS page still
+ * has children. But, since all children in this 'sgx_vepc'
+ * have been removed, the SECS page must have a child on
+ * another instance.
+ */
+ if (sgx_vepc_free_page(epc_page))
+ list_add_tail(&epc_page->list, &secs_pages);
+
+ xa_erase(&vepc->page_array, index);
+ }
+
+ /*
+ * SECS pages are "pinned" by child pages, and "unpinned" once all
+ * children have been EREMOVE'd. A child page in this instance
+ * may have pinned an SECS page encountered in an earlier release(),
+ * creating a zombie. Since some children were EREMOVE'd above,
+ * try to EREMOVE all zombies in the hopes that one was unpinned.
+ */
+ mutex_lock(&zombie_secs_pages_lock);
+ list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
+ /*
+ * Speculatively remove the page from the list of zombies,
+ * if the page is successfully EREMOVE'd it will be added to
+ * the list of free pages. If EREMOVE fails, throw the page
+ * on the local list, which will be spliced on at the end.
+ */
+ list_del(&epc_page->list);
+
+ if (sgx_vepc_free_page(epc_page))
+ list_add_tail(&epc_page->list, &secs_pages);
+ }
+
+ if (!list_empty(&secs_pages))
+ list_splice_tail(&secs_pages, &zombie_secs_pages);
+ mutex_unlock(&zombie_secs_pages_lock);
+
+ kfree(vepc);
+
+ return 0;
+}
+
+static int sgx_vepc_open(struct inode *inode, struct file *file)
+{
+ struct sgx_vepc *vepc;
+
+ vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
+ if (!vepc)
+ return -ENOMEM;
+ mutex_init(&vepc->lock);
+ xa_init(&vepc->page_array);
+
+ file->private_data = vepc;
+
+ return 0;
+}
+
+static const struct file_operations sgx_vepc_fops = {
+ .owner = THIS_MODULE,
+ .open = sgx_vepc_open,
+ .release = sgx_vepc_release,
+ .mmap = sgx_vepc_mmap,
+};
+
+static struct miscdevice sgx_vepc_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "sgx_vepc",
+ .nodename = "sgx_vepc",
+ .fops = &sgx_vepc_fops,
+};
+
+int __init sgx_vepc_init(void)
+{
+ /* SGX virtualization requires KVM to work */
+ if (!cpu_feature_enabled(X86_FEATURE_VMX))
+ return -ENODEV;
+
+ INIT_LIST_HEAD(&zombie_secs_pages);
+ mutex_init(&zombie_secs_pages_lock);
+
+ return misc_register(&sgx_vepc_dev);
+}
+
+/**
+ * sgx_virt_ecreate() - Run ECREATE on behalf of guest
+ * @pageinfo: Pointer to PAGEINFO structure
+ * @secs: Userspace pointer to SECS page
+ * @trapnr: trap number injected to guest in case of ECREATE error
+ *
+ * Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
+ * of enforcing policies of guest's enclaves, and return the trap number
+ * which should be injected to guest in case of any ECREATE error.
+ *
+ * Return:
+ * - 0: ECREATE was successful.
+ * - <0: on error.
+ */
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+ int *trapnr)
+{
+ int ret;
+
+ /*
+ * @secs is an untrusted, userspace-provided address. It comes from
+ * KVM and is assumed to be a valid pointer which points somewhere in
+ * userspace. This can fault and call SGX or other fault handlers when
+ * userspace mapping @secs doesn't exist.
+ *
+ * Add a WARN() to make sure @secs is already valid userspace pointer
+ * from caller (KVM), who should already have handled invalid pointer
+ * case (for instance, made by malicious guest). All other checks,
+ * such as alignment of @secs, are deferred to ENCLS itself.
+ */
+ if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
+ return -EINVAL;
+
+ __uaccess_begin();
+ ret = __ecreate(pageinfo, (void *)secs);
+ __uaccess_end();
+
+ if (encls_faulted(ret)) {
+ *trapnr = ENCLS_TRAPNR(ret);
+ return -EFAULT;
+ }
+
+ /* ECREATE doesn't return an error code, it faults or succeeds. */
+ WARN_ON_ONCE(ret);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
+
+static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
+ void __user *secs)
+{
+ int ret;
+
+ /*
+ * Make sure all userspace pointers from caller (KVM) are valid.
+ * All other checks deferred to ENCLS itself. Also see comment
+ * for @secs in sgx_virt_ecreate().
+ */
+#define SGX_EINITTOKEN_SIZE 304
+ if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
+ !access_ok(token, SGX_EINITTOKEN_SIZE) ||
+ !access_ok(secs, PAGE_SIZE)))
+ return -EINVAL;
+
+ __uaccess_begin();
+ ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
+ __uaccess_end();
+
+ return ret;
+}
+
+/**
+ * sgx_virt_einit() - Run EINIT on behalf of guest
+ * @sigstruct: Userspace pointer to SIGSTRUCT structure
+ * @token: Userspace pointer to EINITTOKEN structure
+ * @secs: Userspace pointer to SECS page
+ * @lepubkeyhash: Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
+ * @trapnr: trap number injected to guest in case of EINIT error
+ *
+ * Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
+ * in host, SGX driver may rewrite the hardware values at wish, therefore KVM
+ * needs to update hardware values to guest's virtual MSR values in order to
+ * ensure EINIT is executed with expected hardware values.
+ *
+ * Return:
+ * - 0: EINIT was successful.
+ * - <0: on error.
+ */
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+ void __user *secs, u64 *lepubkeyhash, int *trapnr)
+{
+ int ret;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
+ ret = __sgx_virt_einit(sigstruct, token, secs);
+ } else {
+ preempt_disable();
+
+ sgx_update_lepubkeyhash(lepubkeyhash);
+
+ ret = __sgx_virt_einit(sigstruct, token, secs);
+ preempt_enable();
+ }
+
+ /* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
+ if (ret == -EINVAL)
+ return ret;
+
+ if (encls_faulted(ret)) {
+ *trapnr = ENCLS_TRAPNR(ret);
+ return -EFAULT;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sgx_virt_einit);
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 8678864ce712..132a2de44d2f 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -30,7 +30,7 @@ EXPORT_SYMBOL(__max_die_per_package);
#ifdef CONFIG_SMP
/*
- * Check if given CPUID extended toplogy "leaf" is implemented
+ * Check if given CPUID extended topology "leaf" is implemented
*/
static int check_extended_topology_leaf(int leaf)
{
@@ -44,7 +44,7 @@ static int check_extended_topology_leaf(int leaf)
return 0;
}
/*
- * Return best CPUID Extended Toplogy Leaf supported
+ * Return best CPUID Extended Topology Leaf supported
*/
static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
{
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index c6ede3b3d302..c04b933f48d3 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -27,6 +27,7 @@
#include <linux/clocksource.h>
#include <linux/cpu.h>
#include <linux/reboot.h>
+#include <linux/static_call.h>
#include <asm/div64.h>
#include <asm/x86_init.h>
#include <asm/hypervisor.h>
@@ -336,11 +337,11 @@ static void __init vmware_paravirt_ops_setup(void)
vmware_cyc2ns_setup();
if (vmw_sched_clock)
- pv_ops.time.sched_clock = vmware_sched_clock;
+ paravirt_set_sched_clock(vmware_sched_clock);
if (vmware_is_stealclock_available()) {
has_steal_clock = true;
- pv_ops.time.steal_clock = vmware_steal_clock;
+ static_call_update(pv_steal_clock, vmware_steal_clock);
/* We use reboot notifier only to disable steal clock */
register_reboot_notifier(&vmware_pv_reboot_nb);
@@ -378,6 +379,8 @@ static void __init vmware_set_capabilities(void)
{
setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+ if (vmware_tsc_khz)
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
setup_force_cpu_cap(X86_FEATURE_VMCALL);
else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index a8f3af257e26..b1deacbeb266 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -337,7 +337,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
struct crash_memmap_data cmd;
struct crash_mem *cmem;
- cmem = vzalloc(sizeof(struct crash_mem));
+ cmem = vzalloc(struct_size(cmem, ranges, 1));
if (!cmem)
return -ENOMEM;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 22aad412f965..f74cb7da9557 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -793,7 +793,7 @@ core_initcall(e820__register_nvs_regions);
#endif
/*
- * Allocate the requested number of bytes with the requsted alignment
+ * Allocate the requested number of bytes with the requested alignment
* and return (the physical address) to the caller. Also register this
* range in the 'kexec' E820 table as a reserved range.
*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 683749b80ae2..a85c64000218 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -253,7 +253,7 @@ static bool xfeature_enabled(enum xfeature xfeature)
static void __init setup_xstate_features(void)
{
u32 eax, ebx, ecx, edx, i;
- /* start at the beginnning of the "extended state" */
+ /* start at the beginning of the "extended state" */
unsigned int last_good_offset = offsetof(struct xregs_state,
extended_state_area);
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5e9beb77cafd..18be44163a50 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -104,7 +104,7 @@ static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
static bool __head check_la57_support(unsigned long physaddr)
{
/*
- * 5-level paging is detected and enabled at kernel decomression
+ * 5-level paging is detected and enabled at kernel decompression
* stage. Only check if it has been enabled there.
*/
if (!(native_read_cr4() & X86_CR4_LA57))
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index ee1a283f8e96..d552f177eca0 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -245,7 +245,7 @@ static const __initconst struct idt_data ist_idts[] = {
* after that.
*
* Note, that X86_64 cannot install the real #PF handler in
- * idt_setup_early_traps() because the memory intialization needs the #PF
+ * idt_setup_early_traps() because the memory initialization needs the #PF
* handler from the early_idt_handler_array to initialize the early page
* tables.
*/
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 58aa712973ac..e28f6a5d14f1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -338,7 +338,7 @@ void fixup_irqs(void)
irq_migrate_all_off_this_cpu();
/*
- * We can remove mdelay() and then send spuriuous interrupts to
+ * We can remove mdelay() and then send spurious interrupts to
* new cpu targets for all the irqs that were handled previously by
* this cpu. While it works, I have seen spurious interrupt messages
* (nothing wrong but still...).
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ff7878df96b4..3a43a2dee658 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -17,7 +17,7 @@
* Updated by: Tom Rini <trini@kernel.crashing.org>
* Updated by: Jason Wessel <jason.wessel@windriver.com>
* Modified for 386 by Jim Kingdon, Cygnus Support.
- * Origianl kgdb, compatibility with 2.1.xx kernel by
+ * Original kgdb, compatibility with 2.1.xx kernel by
* David Grothe <dave@gcom.com>
* Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com>
* X86_64 changes from Andi Kleen's patch merged by Jim Houston
@@ -642,7 +642,7 @@ void kgdb_arch_late(void)
struct perf_event **pevent;
/*
- * Pre-allocate the hw breakpoint structions in the non-atomic
+ * Pre-allocate the hw breakpoint instructions in the non-atomic
* portion of kgdb because this operation requires mutexs to
* complete.
*/
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 51c7f5271aee..596de2f6d3a5 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -12,7 +12,7 @@
#include "common.h"
-/* Ftrace callback handler for kprobes -- called under preepmt disabled */
+/* Ftrace callback handler for kprobes -- called under preempt disabled */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 78bb0fae3982..172c947240b9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -650,7 +650,7 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
has_steal_clock = 1;
- pv_ops.time.steal_clock = kvm_steal_clock;
+ static_call_update(pv_steal_clock, kvm_steal_clock);
}
if (pv_tlb_flush_supported()) {
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1fc0962c89c0..d37ed4e1d033 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -106,7 +106,7 @@ static inline void kvm_sched_clock_init(bool stable)
if (!stable)
clear_sched_clock_stable();
kvm_sched_clock_offset = kvm_clock_read();
- pv_ops.time.sched_clock = kvm_sched_clock_read;
+ paravirt_set_sched_clock(kvm_sched_clock_read);
pr_info("kvm-clock: using sched offset of %llu cycles",
kvm_sched_clock_offset);
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a29a44a98e5b..f01cd9a08155 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -260,7 +260,7 @@ static void set_idt(void *newidt, u16 limit)
{
struct desc_ptr curidt;
- /* x86-64 supports unaliged loads & stores */
+ /* x86-64 supports unaligned loads & stores */
curidt.size = limit;
curidt.address = (unsigned long)newidt;
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 4f75d0cf6305..9e1ea99ad9df 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -32,3 +32,12 @@ bool pv_is_native_vcpu_is_preempted(void)
return pv_ops.lock.vcpu_is_preempted.func ==
__raw_callee_save___native_vcpu_is_preempted;
}
+
+void __init paravirt_set_cap(void)
+{
+ if (!pv_is_native_spin_unlock())
+ setup_force_cpu_cap(X86_FEATURE_PVUNLOCK);
+
+ if (!pv_is_native_vcpu_is_preempted())
+ setup_force_cpu_cap(X86_FEATURE_VCPUPREEMPT);
+}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c60222ab8ab9..d0730264786b 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -14,6 +14,7 @@
#include <linux/highmem.h>
#include <linux/kprobes.h>
#include <linux/pgtable.h>
+#include <linux/static_call.h>
#include <asm/bug.h>
#include <asm/paravirt.h>
@@ -52,7 +53,10 @@ void __init default_banner(void)
}
/* Undefined instruction for dealing with missing ops pointers. */
-static const unsigned char ud2a[] = { 0x0f, 0x0b };
+static void paravirt_BUG(void)
+{
+ BUG();
+}
struct branch {
unsigned char opcode;
@@ -85,25 +89,6 @@ u64 notrace _paravirt_ident_64(u64 x)
{
return x;
}
-
-static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
- unsigned long addr, unsigned len)
-{
- struct branch *b = insn_buff;
- unsigned long delta = (unsigned long)target - (addr+5);
-
- if (len < 5) {
-#ifdef CONFIG_RETPOLINE
- WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
-#endif
- return len; /* call too long for patch site */
- }
-
- b->opcode = 0xe9; /* jmp */
- b->delta = delta;
-
- return 5;
-}
#endif
DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -114,8 +99,8 @@ void __init native_pv_lock_init(void)
static_branch_disable(&virt_spin_lock_key);
}
-unsigned paravirt_patch_default(u8 type, void *insn_buff,
- unsigned long addr, unsigned len)
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
+ unsigned int len)
{
/*
* Neat trick to map patch type back to the call within the
@@ -125,20 +110,10 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
unsigned ret;
if (opfunc == NULL)
- /* If there's no function, patch it with a ud2a (BUG) */
- ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
+ /* If there's no function, patch it with paravirt_BUG() */
+ ret = paravirt_patch_call(insn_buff, paravirt_BUG, addr, len);
else if (opfunc == _paravirt_nop)
ret = 0;
-
-#ifdef CONFIG_PARAVIRT_XXL
- /* identity functions just return their single argument */
- else if (opfunc == _paravirt_ident_64)
- ret = paravirt_patch_ident_64(insn_buff, len);
-
- else if (type == PARAVIRT_PATCH(cpu.iret))
- /* If operation requires a jmp, then jmp */
- ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
-#endif
else
/* Otherwise call the function. */
ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
@@ -146,19 +121,6 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
return ret;
}
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
- const char *start, const char *end)
-{
- unsigned insn_len = end - start;
-
- /* Alternative instruction is too large for the patch site and we cannot continue: */
- BUG_ON(insn_len > len || start == NULL);
-
- memcpy(insn_buff, start, insn_len);
-
- return insn_len;
-}
-
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
@@ -167,6 +129,14 @@ static u64 native_steal_clock(int cpu)
return 0;
}
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock);
+
+void paravirt_set_sched_clock(u64 (*func)(void))
+{
+ static_call_update(pv_sched_clock, func);
+}
+
/* These are in entry.S */
extern void native_iret(void);
@@ -269,13 +239,6 @@ struct pv_info pv_info = {
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
struct paravirt_patch_template pv_ops = {
- /* Init ops. */
- .init.patch = native_patch,
-
- /* Time ops. */
- .time.sched_clock = native_sched_clock,
- .time.steal_clock = native_steal_clock,
-
/* Cpu ops. */
.cpu.io_delay = native_io_delay,
@@ -308,8 +271,6 @@ struct paravirt_patch_template pv_ops = {
.cpu.load_sp0 = native_load_sp0,
- .cpu.iret = native_iret,
-
#ifdef CONFIG_X86_IOPL_IOPERM
.cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap,
.cpu.update_io_bitmap = native_tss_update_io_bitmap,
@@ -414,6 +375,8 @@ struct paravirt_patch_template pv_ops = {
NOKPROBE_SYMBOL(native_get_debugreg);
NOKPROBE_SYMBOL(native_set_debugreg);
NOKPROBE_SYMBOL(native_load_idt);
+
+void (*paravirt_iret)(void) = native_iret;
#endif
EXPORT_SYMBOL(pv_ops);
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
deleted file mode 100644
index abd27ec67397..000000000000
--- a/arch/x86/kernel/paravirt_patch.c
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/stringify.h>
-
-#include <asm/paravirt.h>
-#include <asm/asm-offsets.h>
-
-#define PSTART(d, m) \
- patch_data_##d.m
-
-#define PEND(d, m) \
- (PSTART(d, m) + sizeof(patch_data_##d.m))
-
-#define PATCH(d, m, insn_buff, len) \
- paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m))
-
-#define PATCH_CASE(ops, m, data, insn_buff, len) \
- case PARAVIRT_PATCH(ops.m): \
- return PATCH(data, ops##_##m, insn_buff, len)
-
-#ifdef CONFIG_PARAVIRT_XXL
-struct patch_xxl {
- const unsigned char irq_irq_disable[1];
- const unsigned char irq_irq_enable[1];
- const unsigned char irq_save_fl[2];
- const unsigned char mmu_read_cr2[3];
- const unsigned char mmu_read_cr3[3];
- const unsigned char mmu_write_cr3[3];
- const unsigned char cpu_wbinvd[2];
- const unsigned char mov64[3];
-};
-
-static const struct patch_xxl patch_data_xxl = {
- .irq_irq_disable = { 0xfa }, // cli
- .irq_irq_enable = { 0xfb }, // sti
- .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
- .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
- .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
- .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
- .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
- .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
-};
-
-unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
-{
- return PATCH(xxl, mov64, insn_buff, len);
-}
-# endif /* CONFIG_PARAVIRT_XXL */
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-struct patch_lock {
- unsigned char queued_spin_unlock[3];
- unsigned char vcpu_is_preempted[2];
-};
-
-static const struct patch_lock patch_data_lock = {
- .vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax
-
-# ifdef CONFIG_X86_64
- .queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi)
-# else
- .queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax)
-# endif
-};
-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
-
-unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
- unsigned int len)
-{
- switch (type) {
-
-#ifdef CONFIG_PARAVIRT_XXL
- PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
- PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
- PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
-
- PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len);
- PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
- PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
-
- PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
-#endif
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
- case PARAVIRT_PATCH(lock.queued_spin_unlock):
- if (pv_is_native_spin_unlock())
- return PATCH(lock, queued_spin_unlock, insn_buff, len);
- break;
-
- case PARAVIRT_PATCH(lock.vcpu_is_preempted):
- if (pv_is_native_vcpu_is_preempted())
- return PATCH(lock, vcpu_is_preempted, insn_buff, len);
- break;
-#endif
- default:
- break;
- }
-
- return paravirt_patch_default(type, insn_buff, addr, len);
-}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9c214d7085a4..43cbfc84153a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -63,14 +63,9 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
*/
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
- /*
- * .sp1 is cpu_current_top_of_stack. The init task never
- * runs user code, but cpu_current_top_of_stack should still
- * be well defined before the first context switch.
- */
+#ifdef CONFIG_X86_32
.sp1 = TOP_OF_INIT_STACK,
-#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
#endif
@@ -451,7 +446,7 @@ void speculative_store_bypass_ht_init(void)
* First HT sibling to come up on the core. Link shared state of
* the first HT sibling to itself. The siblings on the same core
* which come up later will see the shared state pointer and link
- * themself to the state of this CPU.
+ * themselves to the state of this CPU.
*/
st->shared_state = st;
}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 11065dc03f5b..eda37df016f0 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -89,7 +89,7 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
/*
* Assumption here is that last_value, a global accumulator, always goes
* forward. If we are less than that, we should not be much smaller.
- * We assume there is an error marging we're inside, and then the correction
+ * We assume there is an error margin we're inside, and then the correction
* does not sacrifice accuracy.
*
* For reads: global may have changed between test and return,
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 94b33885f8d2..f469153eca8a 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -107,7 +107,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
* - Write protect disabled
* - No task switch
* - Don't do FP software emulation.
- * - Proctected mode enabled
+ * - Protected mode enabled
*/
movl %cr0, %eax
andl $~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index a4d9a261425b..c53271aebb64 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -121,7 +121,7 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
* - Write protect disabled
* - No task switch
* - Don't do FP software emulation.
- * - Proctected mode enabled
+ * - Protected mode enabled
*/
movq %cr0, %rax
andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ccab6cf91283..69757fac7462 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -65,7 +65,7 @@ RESERVE_BRK(dmi_alloc, 65536);
/*
* Range of the BSS area. The size of the BSS area is determined
- * at link time, with RESERVE_BRK*() facility reserving additional
+ * at link time, with RESERVE_BRK() facility reserving additional
* chunks.
*/
unsigned long _brk_start = (unsigned long)__brk_base;
@@ -633,11 +633,16 @@ static void __init trim_snb_memory(void)
printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
/*
- * Reserve all memory below the 1 MB mark that has not
- * already been reserved.
+ * SandyBridge integrated graphics devices have a bug that prevents
+ * them from accessing certain memory ranges, namely anything below
+ * 1M and in the pages listed in bad_pages[] above.
+ *
+ * To avoid these pages being ever accessed by SNB gfx devices
+ * reserve all memory below the 1 MB mark and bad_pages that have
+ * not already been reserved at boot time.
*/
memblock_reserve(0, 1<<20);
-
+
for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
if (memblock_reserve(bad_pages[i], PAGE_SIZE))
printk(KERN_WARNING "failed to reserve 0x%08lx\n",
@@ -645,18 +650,6 @@ static void __init trim_snb_memory(void)
}
}
-/*
- * Here we put platform-specific memory range workarounds, i.e.
- * memory known to be corrupt or otherwise in need to be reserved on
- * specific platforms.
- *
- * If this gets used more widely it could use a real dispatch mechanism.
- */
-static void __init trim_platform_memory_ranges(void)
-{
- trim_snb_memory();
-}
-
static void __init trim_bios_range(void)
{
/*
@@ -725,11 +718,41 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
-static void __init trim_low_memory_range(void)
+static void __init early_reserve_memory(void)
{
+ /*
+ * Reserve the memory occupied by the kernel between _text and
+ * __end_of_kernel_reserve symbols. Any kernel sections after the
+ * __end_of_kernel_reserve symbol must be explicitly reserved with a
+ * separate memblock_reserve() or they will be discarded.
+ */
+ memblock_reserve(__pa_symbol(_text),
+ (unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
+
+ /*
+ * The first 4Kb of memory is a BIOS owned area, but generally it is
+ * not listed as such in the E820 table.
+ *
+ * Reserve the first memory page and typically some additional
+ * memory (64KiB by default) since some BIOSes are known to corrupt
+ * low memory. See the Kconfig help text for X86_RESERVE_LOW.
+ *
+ * In addition, make sure page 0 is always reserved because on
+ * systems with L1TF its contents can be leaked to user processes.
+ */
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
+
+ early_reserve_initrd();
+
+ if (efi_enabled(EFI_BOOT))
+ efi_memblock_x86_reserve_range();
+
+ memblock_x86_reserve_range_setup_data();
+
+ reserve_ibft_region();
+ reserve_bios_regions();
}
-
+
/*
* Dump out kernel offset information on panic.
*/
@@ -764,29 +787,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
void __init setup_arch(char **cmdline_p)
{
- /*
- * Reserve the memory occupied by the kernel between _text and
- * __end_of_kernel_reserve symbols. Any kernel sections after the
- * __end_of_kernel_reserve symbol must be explicitly reserved with a
- * separate memblock_reserve() or they will be discarded.
- */
- memblock_reserve(__pa_symbol(_text),
- (unsigned long)__end_of_kernel_reserve - (unsigned long)_text);
-
- /*
- * Make sure page 0 is always reserved because on systems with
- * L1TF its contents can be leaked to user processes.
- */
- memblock_reserve(0, PAGE_SIZE);
-
- early_reserve_initrd();
-
- /*
- * At this point everything still needed from the boot loader
- * or BIOS or kernel text should be early reserved or marked not
- * RAM in e820. All other memory is free game.
- */
-
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -910,8 +910,18 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
- if (efi_enabled(EFI_BOOT))
- efi_memblock_x86_reserve_range();
+ /*
+ * Do some memory reservations *before* memory is added to
+ * memblock, so memblock allocations won't overwrite it.
+ * Do it after early param, so we could get (unlikely) panic from
+ * serial.
+ *
+ * After this point everything still needed from the boot loader or
+ * firmware or kernel text should be early reserved or marked not
+ * RAM in e820. All other memory is free game.
+ */
+ early_reserve_memory();
+
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory used by the kernel cannot be hot-removed because Linux
@@ -938,9 +948,6 @@ void __init setup_arch(char **cmdline_p)
x86_report_nx();
- /* after early param, so could get panic from serial */
- memblock_x86_reserve_range_setup_data();
-
if (acpi_mps_check()) {
#ifdef CONFIG_X86_LOCAL_APIC
disable_apic = 1;
@@ -1032,14 +1039,12 @@ void __init setup_arch(char **cmdline_p)
*/
find_smp_config();
- reserve_ibft_region();
-
early_alloc_pgt_buf();
/*
* Need to conclude brk, before e820__memblock_setup()
- * it could use memblock_find_in_range, could overlap with
- * brk area.
+ * it could use memblock_find_in_range, could overlap with
+ * brk area.
*/
reserve_brk();
@@ -1054,8 +1059,6 @@ void __init setup_arch(char **cmdline_p)
*/
sev_setup_arch();
- reserve_bios_regions();
-
efi_fake_memmap();
efi_find_mirror();
efi_esrt_init();
@@ -1081,8 +1084,12 @@ void __init setup_arch(char **cmdline_p)
reserve_real_mode();
- trim_platform_memory_ranges();
- trim_low_memory_range();
+ /*
+ * Reserving memory causing GPU hangs on Sandy Bridge integrated
+ * graphics devices should be done after we allocated memory under
+ * 1M for the real mode trampoline.
+ */
+ trim_snb_memory();
init_mem_mapping();
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
index cdc04d091242..0aa9f13efd57 100644
--- a/arch/x86/kernel/sev-es-shared.c
+++ b/arch/x86/kernel/sev-es-shared.c
@@ -24,7 +24,7 @@ static bool __init sev_es_check_cpu_features(void)
return true;
}
-static void sev_es_terminate(unsigned int reason)
+static void __noreturn sev_es_terminate(unsigned int reason)
{
u64 val = GHCB_SEV_TERMINATE;
@@ -186,7 +186,6 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
* make it accessible to the hypervisor.
*
* In particular, check for:
- * - Hypervisor CPUID bit
* - Availability of CPUID leaf 0x8000001f
* - SEV CPUID bit.
*
@@ -194,10 +193,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
* can't be checked here.
*/
- if ((fn == 1 && !(regs->cx & BIT(31))))
- /* Hypervisor bit */
- goto fail;
- else if (fn == 0x80000000 && (regs->ax < 0x8000001f))
+ if (fn == 0x80000000 && (regs->ax < 0x8000001f))
/* SEV leaf check */
goto fail;
else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
@@ -210,12 +206,8 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
return;
fail:
- sev_es_wr_ghcb_msr(GHCB_SEV_TERMINATE);
- VMGEXIT();
-
- /* Shouldn't get here - if we do halt the machine */
- while (true)
- asm volatile("hlt\n");
+ /* Terminate the guest */
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
}
static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 04a780abb512..26f5479a97a8 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -137,29 +137,41 @@ static __always_inline bool on_vc_stack(struct pt_regs *regs)
}
/*
- * This function handles the case when an NMI is raised in the #VC exception
- * handler entry code. In this case, the IST entry for #VC must be adjusted, so
- * that any subsequent #VC exception will not overwrite the stack contents of the
- * interrupted #VC handler.
+ * This function handles the case when an NMI is raised in the #VC
+ * exception handler entry code, before the #VC handler has switched off
+ * its IST stack. In this case, the IST entry for #VC must be adjusted,
+ * so that any nested #VC exception will not overwrite the stack
+ * contents of the interrupted #VC handler.
*
* The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested
- * sev_es_ist_exit() call may adjust back the IST entry too early.
+ * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
+ * nested sev_es_ist_exit() call may adjust back the IST entry too
+ * early.
+ *
+ * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
+ * on the NMI IST stack, as they are only called from NMI handling code
+ * right now.
*/
void noinstr __sev_es_ist_enter(struct pt_regs *regs)
{
unsigned long old_ist, new_ist;
/* Read old IST entry */
- old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+ new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
- /* Make room on the IST stack */
+ /*
+ * If NMI happened while on the #VC IST stack, set the new IST
+ * value below regs->sp, so that the interrupted stack frame is
+ * not overwritten by subsequent #VC exceptions.
+ */
if (on_vc_stack(regs))
- new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
- else
- new_ist = old_ist - sizeof(old_ist);
+ new_ist = regs->sp;
- /* Store old IST entry */
+ /*
+ * Reserve additional 8 bytes and store old IST value so this
+ * adjustment can be unrolled in __sev_es_ist_exit().
+ */
+ new_ist -= sizeof(old_ist);
*(unsigned long *)new_ist = old_ist;
/* Set new IST entry */
@@ -277,7 +289,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
return ES_EXCEPTION;
}
- insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
+ insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
insn_get_length(&ctxt->insn);
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index f306e85a08a6..a06cb107c0e8 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -492,7 +492,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
* SS descriptor, but we do need SS to be valid. It's possible
* that the old SS is entirely bogus -- this can happen if the
* signal we're trying to deliver is #GP or #SS caused by a bad
- * SS value. We also have a compatbility issue here: DOSEMU
+ * SS value. We also have a compatibility issue here: DOSEMU
* relies on the contents of the SS register indicating the
* SS value at the time of the signal, even though that code in
* DOSEMU predates sigreturn's ability to restore SS. (DOSEMU
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index eff4ce3b10da..06db901fabe8 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -67,7 +67,7 @@
* 5AP. symmetric IO mode (normal Linux operation) not affected.
* 'noapic' mode has vector 0xf filled out properly.
* 6AP. 'noapic' mode might be affected - fixed in later steppings
- * 7AP. We do not assume writes to the LVT deassering IRQs
+ * 7AP. We do not assume writes to the LVT deasserting IRQs
* 8AP. We do not enable low power mode (deep sleep) during MP bootup
* 9AP. We do not use mixed mode
*
@@ -204,7 +204,7 @@ static void native_stop_other_cpus(int wait)
}
/*
* Don't wait longer than 10 ms if the caller didn't
- * reqeust it. If wait is true, the machine hangs here if
+ * request it. If wait is true, the machine hangs here if
* one or more CPUs do not reach shutdown state.
*/
timeout = USEC_PER_MSEC * 10;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 16703c35a944..1e2050c4f94a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1407,7 +1407,7 @@ void __init calculate_max_logical_packages(void)
int ncpus;
/*
- * Today neither Intel nor AMD support heterogenous systems so
+ * Today neither Intel nor AMD support heterogeneous systems so
* extrapolate the boot cpu's data to all packages.
*/
ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 8627fda8d993..15b058eefc4e 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -29,12 +29,6 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
}
}
-/*
- * This function returns an error if it detects any unreliable features of the
- * stack. Otherwise it guarantees that the stack trace is reliable.
- *
- * If the task is not 'current', the caller *must* ensure the task is inactive.
- */
int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
void *cookie, struct task_struct *task)
{
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 653b7f617b61..8a56a6d80098 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -10,7 +10,7 @@
* EFI Quirks
* Several EFI systems do not correctly advertise their boot framebuffers.
* Hence, we use this static table of known broken machines and fix up the
- * information so framebuffer drivers can load corectly.
+ * information so framebuffer drivers can load correctly.
*/
#include <linux/dmi.h>
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 4c09ba110204..f9af561c3cd4 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -49,6 +49,30 @@ bool tboot_enabled(void)
return tboot != NULL;
}
+/* noinline to prevent gcc from warning about dereferencing constant fixaddr */
+static noinline __init bool check_tboot_version(void)
+{
+ if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
+ pr_warn("tboot at 0x%llx is invalid\n", boot_params.tboot_addr);
+ return false;
+ }
+
+ if (tboot->version < 5) {
+ pr_warn("tboot version is invalid: %u\n", tboot->version);
+ return false;
+ }
+
+ pr_info("found shared page at phys addr 0x%llx:\n",
+ boot_params.tboot_addr);
+ pr_debug("version: %d\n", tboot->version);
+ pr_debug("log_addr: 0x%08x\n", tboot->log_addr);
+ pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry);
+ pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base);
+ pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
+
+ return true;
+}
+
void __init tboot_probe(void)
{
/* Look for valid page-aligned address for shared page. */
@@ -66,25 +90,9 @@ void __init tboot_probe(void)
/* Map and check for tboot UUID. */
set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
- tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
- if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
- pr_warn("tboot at 0x%llx is invalid\n", boot_params.tboot_addr);
+ tboot = (void *)fix_to_virt(FIX_TBOOT_BASE);
+ if (!check_tboot_version())
tboot = NULL;
- return;
- }
- if (tboot->version < 5) {
- pr_warn("tboot version is invalid: %u\n", tboot->version);
- tboot = NULL;
- return;
- }
-
- pr_info("found shared page at phys addr 0x%llx:\n",
- boot_params.tboot_addr);
- pr_debug("version: %d\n", tboot->version);
- pr_debug("log_addr: 0x%08x\n", tboot->log_addr);
- pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry);
- pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base);
- pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
}
static pgd_t *tboot_pg_dir;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index f5477eab5692..bd83748e2bde 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -113,7 +113,7 @@ int arch_register_cpu(int num)
* Two known BSP/CPU0 dependencies: Resume from suspend/hibernate
* depends on BSP. PIC interrupts depend on BSP.
*
- * If the BSP depencies are under control, one can tell kernel to
+ * If the BSP dependencies are under control, one can tell kernel to
* enable BSP hotplug. This basically adds a control file and
* one can attempt to offline BSP.
*/
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 651e3e508959..f577d07fbd43 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -395,7 +395,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
/*
* Adjust our frame so that we return straight to the #GP
* vector with the expected RSP value. This is safe because
- * we won't enable interupts or schedule before we invoke
+ * we won't enable interrupts or schedule before we invoke
* general_protection, so nothing will clobber the stack
* frame we just set up.
*
@@ -978,6 +978,10 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
goto out_irq;
}
+ /* #DB for bus lock can only be triggered from userspace. */
+ if (dr6 & DR_BUS_LOCK)
+ handle_bus_lock(regs);
+
/* Add the virtual_dr6 bits for signals. */
dr6 |= current->thread.virtual_dr6;
if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index f70dffc2771f..57ec01192180 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/timex.h>
#include <linux/static_key.h>
+#include <linux/static_call.h>
#include <asm/hpet.h>
#include <asm/timer.h>
@@ -254,7 +255,7 @@ unsigned long long sched_clock(void)
bool using_native_sched_clock(void)
{
- return pv_ops.time.sched_clock == native_sched_clock;
+ return static_call_query(pv_sched_clock) == native_sched_clock;
}
#else
unsigned long long
@@ -739,7 +740,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
* 2) Reference counter. If available we use the HPET or the
* PMTIMER as a reference to check the sanity of that value.
* We use separate TSC readouts and check inside of the
- * reference read for any possible disturbance. We dicard
+ * reference read for any possible disturbance. We discard
* disturbed values here as well. We do that around the PIT
* calibration delay loop as we have to wait for a certain
* amount of time anyway.
@@ -1079,7 +1080,7 @@ static void tsc_resume(struct clocksource *cs)
* very small window right after one CPU updated cycle_last under
* xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
* is smaller than the cycle_last reference value due to a TSC which
- * is slighty behind. This delta is nowhere else observable, but in
+ * is slightly behind. This delta is nowhere else observable, but in
* that case it results in a forward time jump in the range of hours
* due to the unsigned delta calculation of the time keeping core
* code, which is necessary to support wrapping clocksources like pm
@@ -1264,7 +1265,7 @@ EXPORT_SYMBOL(convert_art_to_tsc);
* corresponding clocksource
* @cycles: System counter value
* @cs: Clocksource corresponding to system counter value. Used
- * by timekeeping code to verify comparibility of two cycle
+ * by timekeeping code to verify comparability of two cycle
* values.
*/
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 3d3c761eb74a..50a4515fe0ad 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -472,7 +472,7 @@ retry:
/*
* Add the result to the previous adjustment value.
*
- * The adjustement value is slightly off by the overhead of the
+ * The adjustment value is slightly off by the overhead of the
* sync mechanism (observed values are ~200 TSC cycles), but this
* really depends on CPU, node distance and frequency. So
* compensating for this is hard to get right. Experiments show
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f6225bf22c02..fac1daae7994 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -272,7 +272,7 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
* by whether the operand is a register or a memory location.
* If operand is a register, return as many bytes as the operand
* size. If operand is memory, return only the two least
- * siginificant bytes.
+ * significant bytes.
*/
if (X86_MODRM_MOD(insn->modrm.value) == 3)
*data_size = insn->opnd_bytes;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a788d5120d4d..f6b93a35ce14 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -84,6 +84,18 @@ config KVM_INTEL
To compile this as a module, choose M here: the module
will be called kvm-intel.
+config X86_SGX_KVM
+ bool "Software Guard eXtensions (SGX) Virtualization"
+ depends on X86_SGX && KVM_INTEL
+ help
+
+ Enables KVM guests to create SGX enclaves.
+
+ This includes support to expose "raw" unreclaimable enclave memory to
+ guests via a device node, e.g. /dev/sgx_vepc.
+
+ If unsure, say N.
+
config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6bd2f8b830e4..c02466a1410b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1033,7 +1033,7 @@ EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
* - Centaur: 0xc0000000 - 0xcfffffff
*
* The Hypervisor class is further subdivided into sub-classes that each act as
- * their own indepdent class associated with a 0x100 byte range. E.g. if Qemu
+ * their own independent class associated with a 0x100 byte range. E.g. if Qemu
* is advertising support for both HyperV and KVM, the resulting Hypervisor
* CPUID sub-classes are:
*
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f7970ba6219f..cdd2a2b6550e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3222,7 +3222,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
}
/*
- * Now load segment descriptors. If fault happenes at this stage
+ * Now load segment descriptors. If fault happens at this stage
* it is handled in a context of new task
*/
ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8a4de3f12820..d5b72a08e566 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -269,7 +269,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue)
{
/* We can't check irqchip_in_kernel() here as some callers are
- * currently inititalizing the irqchip. Other callers should therefore
+ * currently initializing the irqchip. Other callers should therefore
* check kvm_arch_can_set_irq_routing() before calling this function.
*/
switch (ue->type) {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 951dae4e7175..62b1729277ef 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4961,7 +4961,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
/*
* No need to care whether allocation memory is successful
- * or not since pte prefetch is skiped if it does not have
+ * or not since pte prefetch is skipped if it does not have
* enough objects in the cache.
*/
mmu_topup_memory_caches(vcpu, true);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 1f6f98c76bdf..360983865398 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -59,7 +59,7 @@ struct kvm_mmu_page {
#ifdef CONFIG_X86_64
bool tdp_mmu_page;
- /* Used for freeing the page asyncronously if it is a TDP MMU page. */
+ /* Used for freeing the page asynchronously if it is a TDP MMU page. */
struct rcu_head rcu_head;
#endif
};
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 018d82e73e31..34207b874886 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -404,7 +404,7 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
* If this warning were to trigger it would indicate that there was a
* missing MMU notifier or a race with some notifier handler.
* A present, leaf SPTE should never be directly replaced with another
- * present leaf SPTE pointing to a differnt PFN. A notifier handler
+ * present leaf SPTE pointing to a different PFN. A notifier handler
* should be zapping the SPTE before the main MM's page table is
* changed, or the SPTE should be zeroed, and the TLBs flushed by the
* thread before replacement.
@@ -418,7 +418,7 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
/*
* Crash the host to prevent error propagation and guest data
- * courruption.
+ * corruption.
*/
BUG();
}
@@ -529,7 +529,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
/*
* No other thread can overwrite the removed SPTE as they
* must either wait on the MMU lock or use
- * tdp_mmu_set_spte_atomic which will not overrite the
+ * tdp_mmu_set_spte_atomic which will not overwrite the
* special removed SPTE value. No bookkeeping is needed
* here since the SPTE is going from non-present
* to non-present.
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 7b30bc967af3..67e753edfa22 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -103,7 +103,7 @@ static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu,
/* returns general purpose PMC with the specified MSR. Note that it can be
* used for both PERFCTRn and EVNTSELn; that is why it accepts base as a
- * paramenter to tell them apart.
+ * parameter to tell them apart.
*/
static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
u32 base)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 78bdcfac4e40..3e55674098be 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -727,7 +727,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
struct amd_svm_iommu_ir *ir;
/**
- * In some cases, the existing irte is updaed and re-set,
+ * In some cases, the existing irte is updated and re-set,
* so we need to check here if it's already been * added
* to the ir_list.
*/
@@ -838,7 +838,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* Here, we setup with legacy mode in the following cases:
* 1. When cannot target interrupt to a specific vcpu.
* 2. Unsetting posted interrupt.
- * 3. APIC virtialization is disabled for the vcpu.
+ * 3. APIC virtualization is disabled for the vcpu.
* 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
*/
if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 874ea309279f..2b27a9452403 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2082,7 +2082,7 @@ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- /* PKRU is restored on VMEXIT, save the curent host value */
+ /* PKRU is restored on VMEXIT, save the current host value */
hostsa->pkru = read_pkru();
/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 58a45bb139f8..6dad89248312 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4400,7 +4400,7 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
*
* This happens because CPU microcode reading instruction bytes
* uses a special opcode which attempts to read data using CPL=0
- * priviledges. The microcode reads CS:RIP and if it hits a SMAP
+ * privileges. The microcode reads CS:RIP and if it hits a SMAP
* fault, it gives up and returns no instruction bytes.
*
* Detection:
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bcca0b80e0d0..1e069aac7410 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3537,7 +3537,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* snapshot restore (migration).
*
* In this flow, it is assumed that vmcs12 cache was
- * trasferred as part of captured nVMX state and should
+ * transferred as part of captured nVMX state and should
* therefore not be read from guest memory (which may not
* exist on destination host yet).
*/
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 4831bc44ce66..459748680daf 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -10,7 +10,7 @@
#include "vmx.h"
/*
- * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * We maintain a per-CPU linked-list of vCPU, so in wakeup_handler() we
* can find which vCPU should be waken up.
*/
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 29b40e092d13..bcbf0d2139e9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1529,7 +1529,7 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
/*
* MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
- * utilize encodings marked reserved will casue a #GP fault.
+ * utilize encodings marked reserved will cause a #GP fault.
*/
value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
@@ -2761,7 +2761,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
- * Update real mode segment cache. It may be not up-to-date if sement
+ * Update real mode segment cache. It may be not up-to-date if segment
* register was written while vcpu was in a guest mode.
*/
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
@@ -7252,7 +7252,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
- /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */
+ /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabricEn can be set */
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eca63625aee4..efc7a82ab140 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -156,9 +156,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
/*
* lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
- * adaptive tuning starting from default advancment of 1000ns. '0' disables
+ * adaptive tuning starting from default advancement of 1000ns. '0' disables
* advancement entirely. Any other value is used as-is and disables adaptive
- * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ * tuning, i.e. allows privileged userspace to set an exact advancement time.
*/
static int __read_mostly lapic_timer_advance_ns = -1;
module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
@@ -1287,7 +1287,7 @@ static const u32 emulated_msrs_all[] = {
MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
MSR_IA32_TSC_ADJUST,
- MSR_IA32_TSCDEADLINE,
+ MSR_IA32_TSC_DEADLINE,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
MSR_IA32_MISC_ENABLE,
@@ -1372,7 +1372,7 @@ static u64 kvm_get_arch_capabilities(void)
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
* the nested hypervisor runs with NX huge pages. If it is not,
- * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
+ * L1 is anyway vulnerable to ITLB_MULTIHIT exploits from other
* L1 guests, so it need not worry about its own (L2) guests.
*/
data |= ARCH_CAP_PSCHANGE_MC_NO;
@@ -1849,7 +1849,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
ret = EXIT_FASTPATH_EXIT_HANDLED;
}
break;
- case MSR_IA32_TSCDEADLINE:
+ case MSR_IA32_TSC_DEADLINE:
data = kvm_read_edx_eax(vcpu);
if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
kvm_skip_emulated_instruction(vcpu);
@@ -3087,7 +3087,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_set_apic_base(vcpu, msr_info);
case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_write(vcpu, msr, data);
- case MSR_IA32_TSCDEADLINE:
+ case MSR_IA32_TSC_DEADLINE:
kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
case MSR_IA32_TSC_ADJUST:
@@ -3449,7 +3449,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
- case MSR_IA32_TSCDEADLINE:
+ case MSR_IA32_TSC_DEADLINE:
msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
break;
case MSR_IA32_TSC_ADJUST:
@@ -4025,7 +4025,6 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
struct kvm_host_map map;
struct kvm_steal_time *st;
- int idx;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -4033,15 +4032,9 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (vcpu->arch.st.preempted)
return;
- /*
- * Take the srcu lock as memslots will be accessed to check the gfn
- * cache generation against the memslots generation.
- */
- idx = srcu_read_lock(&vcpu->kvm->srcu);
-
if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
&vcpu->arch.st.cache, true))
- goto out;
+ return;
st = map.hva +
offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
@@ -4049,20 +4042,25 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
-
-out:
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ int idx;
+
if (vcpu->preempted && !vcpu->arch.guest_state_protected)
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+ /*
+ * Take the srcu lock as memslots will be accessed to check the gfn
+ * cache generation against the memslots generation.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvm_xen_msr_enabled(vcpu->kvm))
kvm_xen_runstate_set_preempted(vcpu);
else
kvm_steal_time_set_preempted(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 3b6544111ac9..16bc9130e7a5 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -6,7 +6,7 @@
*/
#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
/* if you want SMP support, implement these with real spinlocks */
.macro LOCK reg
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 1c5c81c16b06..ce6935690766 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -6,7 +6,7 @@
*/
#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
.macro read64 reg
movl %ebx, %eax
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 2402d4c489d2..db4b4f9197c7 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -3,7 +3,7 @@
#include <linux/linkage.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/export.h>
/*
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 77b9b2a3b5c8..57b79c577496 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -11,7 +11,7 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index bb0b3fe1e0a0..2bf07e18e38c 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -232,7 +232,7 @@ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off)
* resolve_seg_reg() - obtain segment register index
* @insn: Instruction with operands
* @regs: Register values as seen when entering kernel mode
- * @regoff: Operand offset, in pt_regs, used to deterimine segment register
+ * @regoff: Operand offset, in pt_regs, used to determine segment register
*
* Determine the segment register associated with the operands and, if
* applicable, prefixes and the instruction pointed by @insn.
@@ -517,7 +517,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
* @insn: Instruction containing ModRM byte
* @regs: Register values as seen when entering kernel mode
* @offs1: Offset of the first operand register
- * @offs2: Offset of the second opeand register, if applicable
+ * @offs2: Offset of the second operand register, if applicable
*
* Obtain the offset, in pt_regs, of the registers indicated by the ModRM byte
* in @insn. This function is to be used with 16-bit address encodings. The
@@ -576,7 +576,7 @@ static int get_reg_offset_16(struct insn *insn, struct pt_regs *regs,
* If ModRM.mod is 0 and ModRM.rm is 110b, then we use displacement-
* only addressing. This means that no registers are involved in
* computing the effective address. Thus, ensure that the first
- * register offset is invalild. The second register offset is already
+ * register offset is invalid. The second register offset is already
* invalid under the aforementioned conditions.
*/
if ((X86_MODRM_MOD(insn->modrm.value) == 0) &&
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 1e299ac73c86..1cc9da6e29c7 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,7 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/export.h>
.pushsection .noinstr.text, "ax"
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 41902fe8b859..64801010d312 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,7 +8,7 @@
*/
#include <linux/linkage.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/export.h>
#undef memmove
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 0bfd26e4ca9e..9827ae267f96 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -3,7 +3,7 @@
#include <linux/linkage.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/export.h>
/*
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 419365c48b2a..cc5f4ea943d3 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -14,7 +14,7 @@
* tested so far for any MMX solution figured.
*
* 22/09/2000 - Arjan van de Ven
- * Improved for non-egineering-sample Athlons
+ * Improved for non-engineering-sample Athlons
*
*/
#include <linux/hardirq.h>
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 75a0915b0d01..40bbe56bde32 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -252,7 +252,7 @@ static void __wrmsr_safe_regs_on_cpu(void *info)
rv->err = wrmsr_safe_regs(rv->regs);
}
-int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
int err;
struct msr_regs_info rv;
@@ -265,7 +265,7 @@ int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
}
EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
-int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
int err;
struct msr_regs_info rv;
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 3bd905e10ee2..b09cd2ad426c 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(msrs_free);
* argument @m.
*
*/
-int msr_read(u32 msr, struct msr *m)
+static int msr_read(u32 msr, struct msr *m)
{
int err;
u64 val;
@@ -54,7 +54,7 @@ int msr_read(u32 msr, struct msr *m)
* @msr: MSR to write
* @m: value to write
*/
-int msr_write(u32 msr, struct msr *m)
+static int msr_write(u32 msr, struct msr *m)
{
return wrmsrl_safe(msr, m->q);
}
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index f6fb1d218dcc..6bb74b5c238c 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -4,7 +4,7 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c
index 4a9887851ad8..990d847ae902 100644
--- a/arch/x86/math-emu/fpu_trig.c
+++ b/arch/x86/math-emu/fpu_trig.c
@@ -547,7 +547,7 @@ static void frndint_(FPU_REG *st0_ptr, u_char st0_tag)
single_arg_error(st0_ptr, st0_tag);
}
-static int fsin(FPU_REG *st0_ptr, u_char tag)
+static int f_sin(FPU_REG *st0_ptr, u_char tag)
{
u_char arg_sign = getsign(st0_ptr);
@@ -608,6 +608,11 @@ static int fsin(FPU_REG *st0_ptr, u_char tag)
}
}
+static void fsin(FPU_REG *st0_ptr, u_char tag)
+{
+ f_sin(st0_ptr, tag);
+}
+
static int f_cos(FPU_REG *st0_ptr, u_char tag)
{
u_char st0_sign;
@@ -724,7 +729,7 @@ static void fsincos(FPU_REG *st0_ptr, u_char st0_tag)
}
reg_copy(st0_ptr, &arg);
- if (!fsin(st0_ptr, st0_tag)) {
+ if (!f_sin(st0_ptr, st0_tag)) {
push();
FPU_copy_to_reg0(&arg, st0_tag);
f_cos(&st(0), st0_tag);
@@ -1635,7 +1640,7 @@ void FPU_triga(void)
}
static FUNC_ST0 const trig_table_b[] = {
- fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0) fsin, fcos
+ fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, fsin, fcos
};
void FPU_trigb(void)
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index fe6246ff9887..7ca6417c0c8d 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -964,7 +964,7 @@ int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d)
/* The return value (in eax) is zero if the result is exact,
if bits are changed due to rounding, truncation, etc, then
a non-zero value is returned */
-/* Overflow is signalled by a non-zero return value (in eax).
+/* Overflow is signaled by a non-zero return value (in eax).
In the case of overflow, the returned significand always has the
largest possible value */
int FPU_round_to_int(FPU_REG *r, u_char tag)
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
index 11a1f798451b..4a9fc3cc5a4d 100644
--- a/arch/x86/math-emu/reg_round.S
+++ b/arch/x86/math-emu/reg_round.S
@@ -575,7 +575,7 @@ Normalise_result:
#ifdef PECULIAR_486
/*
* This implements a special feature of 80486 behaviour.
- * Underflow will be signalled even if the number is
+ * Underflow will be signaled even if the number is
* not a denormal after rounding.
* This difference occurs only for masked underflow, and not
* in the unmasked case.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a73347e2cdfc..1c548ad00752 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1497,7 +1497,7 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
* userspace task is trying to access some valid (from guest's point of
* view) memory which is not currently mapped by the host (e.g. the
* memory is swapped out). Note, the corresponding "page ready" event
- * which is injected when the memory becomes available, is delived via
+ * which is injected when the memory becomes available, is delivered via
* an interrupt mechanism and not a #PF exception
* (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
*
@@ -1523,7 +1523,7 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
*
* In case the fault hit a RCU idle region the conditional entry
* code reenabled RCU to avoid subsequent wreckage which helps
- * debugability.
+ * debuggability.
*/
state = irqentry_enter(regs);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index dd694fb93916..fbf41dd142ca 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,7 +29,7 @@
/*
* We need to define the tracepoints somewhere, and tlb.c
- * is only compied when SMP=y.
+ * is only compiled when SMP=y.
*/
#define CREATE_TRACE_POINTS
#include <trace/events/tlb.h>
@@ -756,7 +756,7 @@ void __init init_mem_mapping(void)
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
- /* can we preseve max_low_pfn ?*/
+ /* can we preserve max_low_pfn ?*/
max_low_pfn = max_pfn;
}
#else
@@ -939,7 +939,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
{
/*
* end could be not aligned, and We can not align that,
- * decompresser could be confused by aligned initrd_end
+ * decompressor could be confused by aligned initrd_end
* We already reserve the end partial page before in
* - i386_start_kernel()
* - x86_64_start_kernel()
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b5a3fa4033d3..55247451ba85 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -172,7 +172,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
/*
* With folded p4d, pgd_none() is always false, we need to
- * handle synchonization on p4d level.
+ * handle synchronization on p4d level.
*/
MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, addr);
@@ -986,7 +986,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
/*
* Do not free direct mapping pages since they were
- * freed when offlining, or simplely not in use.
+ * freed when offlining, or simply not in use.
*/
if (!direct)
free_pagetable(pte_page(*pte), 0);
@@ -1004,7 +1004,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
*
* If we are not removing the whole page, it means
* other page structs in this page are being used and
- * we canot remove them. So fill the unused page_structs
+ * we cannot remove them. So fill the unused page_structs
* with 0xFD, and remove the page when it is wholly
* filled with 0xFD.
*/
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 6e6b39710e5f..557f0fe25dff 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -96,7 +96,7 @@ void __init kernel_randomize_memory(void)
memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
- /* Adapt phyiscal memory region size based on available memory */
+ /* Adapt physical memory region size based on available memory */
if (memory_tb < kaslr_regions[0].size_tb)
kaslr_regions[0].size_tb = memory_tb;
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index be020a7bc414..d3efbc5b3449 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Support for MMIO probes.
- * Benfit many code from kprobes
+ * Benefit many code from kprobes
* (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
* 2007 Alexander Eichner
* 2008 Pekka Paalanen <pq@iki.fi>
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index ae78cef79980..f633f9e23b8f 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/dma-mapping.h>
+#include <linux/virtio_config.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
@@ -484,3 +485,8 @@ void __init mem_encrypt_init(void)
print_mem_encrypt_feature_info();
}
+int arch_has_restricted_virtio_memory_access(void)
+{
+ return sev_active();
+}
+EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access);
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 7a84fc8bc5c3..17d292b7072f 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -27,7 +27,7 @@ SYM_FUNC_START(sme_encrypt_execute)
* - stack page (PAGE_SIZE)
* - encryption routine page (PAGE_SIZE)
* - intermediate copy buffer (PMD_PAGE_SIZE)
- * R8 - physcial address of the pagetables to use for encryption
+ * R8 - physical address of the pagetables to use for encryption
*/
push %rbp
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 6c5eb6f3f14f..a19374d26101 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -503,14 +503,10 @@ void __init sme_enable(struct boot_params *bp)
#define AMD_SME_BIT BIT(0)
#define AMD_SEV_BIT BIT(1)
- /*
- * Set the feature mask (SME or SEV) based on whether we are
- * running under a hypervisor.
- */
- eax = 1;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
+
+ /* Check the SEV MSR whether SEV or SME is enabled */
+ sev_status = __rdmsr(MSR_AMD64_SEV);
+ feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
/*
* Check for the SME/SEV feature:
@@ -530,19 +526,26 @@ void __init sme_enable(struct boot_params *bp)
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
+ /*
+ * No SME if Hypervisor bit is set. This check is here to
+ * prevent a guest from trying to enable SME. For running as a
+ * KVM guest the MSR_K8_SYSCFG will be sufficient, but there
+ * might be other hypervisors which emulate that MSR as non-zero
+ * or even pass it through to the guest.
+ * A malicious hypervisor can still trick a guest into this
+ * path, but there is no way to protect against that.
+ */
+ eax = 1;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (ecx & BIT(31))
+ return;
+
/* For SME, check the SYSCFG MSR */
msr = __rdmsr(MSR_K8_SYSCFG);
if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
return;
} else {
- /* For SEV, check the SEV MSR */
- msr = __rdmsr(MSR_AMD64_SEV);
- if (!(msr & MSR_AMD64_SEV_ENABLED))
- return;
-
- /* Save SEV_STATUS to avoid reading MSR again */
- sev_status = msr;
-
/* SEV state cannot be controlled by a command line option */
sme_me_mask = me_mask;
sev_enabled = true;
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index ca311aaa67b8..3112ca7786ed 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -695,7 +695,7 @@ int memtype_free(u64 start, u64 end)
/**
- * lookup_memtype - Looksup the memory type for a physical address
+ * lookup_memtype - Looks up the memory type for a physical address
* @paddr: physical address of which memory type needs to be looked up
*
* Only to be called when PAT is enabled
@@ -800,6 +800,7 @@ void memtype_free_io(resource_size_t start, resource_size_t end)
memtype_free(start, end);
}
+#ifdef CONFIG_X86_PAT
int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
{
enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
@@ -813,6 +814,7 @@ void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
memtype_free_io(start, start + size);
}
EXPORT_SYMBOL(arch_io_free_memtype_wc);
+#endif
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 16f878c26667..427980617557 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -680,7 +680,7 @@ pmd_t *lookup_pmd_address(unsigned long address)
* end up in this kind of memory, for instance.
*
* This could be optimized, but it is only intended to be
- * used at inititalization time, and keeping it
+ * used at initialization time, and keeping it
* unoptimized should increase the testing coverage for
* the more obscure platforms.
*/
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 8873ed1438a9..a2332eef66e9 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -128,7 +128,7 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
/*
* Called from the FPU code when creating a fresh set of FPU
* registers. This is called from a very specific context where
- * we know the FPU regstiers are safe for use and we can use PKRU
+ * we know the FPU registers are safe for use and we can use PKRU
* directly.
*/
void copy_init_pkru_to_fpregs(void)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 1aab92930569..5d5c7bb50ce9 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -361,7 +361,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
* global, so set it as global in both copies. Note:
* the X86_FEATURE_PGE check is not _required_ because
* the CPU ignores _PAGE_GLOBAL when PGE is not
- * supported. The check keeps consistentency with
+ * supported. The check keeps consistency with
* code that only set this bit when supported.
*/
if (boot_cpu_has(X86_FEATURE_PGE))
@@ -440,10 +440,9 @@ static void __init pti_clone_user_shared(void)
for_each_possible_cpu(cpu) {
/*
- * The SYSCALL64 entry code needs to be able to find the
- * thread stack and needs one word of scratch space in which
- * to spill a register. All of this lives in the TSS, in
- * the sp1 and sp2 slots.
+ * The SYSCALL64 entry code needs one word of scratch space
+ * in which to spill a register. It lives in the sp2 slot
+ * of the CPU's TSS.
*
* This is done for all possible CPUs during boot to ensure
* that it's propagated to all mms.
@@ -512,7 +511,7 @@ static void pti_clone_entry_text(void)
static inline bool pti_kernel_image_global_ok(void)
{
/*
- * Systems with PCIDs get litlle benefit from global
+ * Systems with PCIDs get little benefit from global
* kernel text and are not worth the downsides.
*/
if (cpu_feature_enabled(X86_FEATURE_PCID))
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 569ac1d57f55..98f269560d40 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -106,7 +106,7 @@ static inline u16 kern_pcid(u16 asid)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
- * Make sure that the dynamic ASID space does not confict with the
+ * Make sure that the dynamic ASID space does not conflict with the
* bit we are using to switch between user and kernel ASIDs.
*/
BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
@@ -736,7 +736,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
* 3, we'd be break the invariant: we'd update local_tlb_gen above
* 1 without the full flush that's needed for tlb_gen 2.
*
- * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
+ * 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimization.
* Partial TLB flushes are not all that much cheaper than full TLB
* flushes, so it seems unlikely that it would be a performance win
* to do a partial flush if that won't bring our TLB fully up to
@@ -876,7 +876,7 @@ static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
static inline void put_flush_tlb_info(void)
{
#ifdef CONFIG_DEBUG_VM
- /* Complete reentrency prevention checks */
+ /* Complete reentrancy prevention checks */
barrier();
this_cpu_dec(flush_tlb_info_idx);
#endif
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7f1b3a862e14..220e72434f3c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1556,7 +1556,7 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
if (is_imm8(jmp_offset)) {
if (jmp_padding) {
/* To keep the jmp_offset valid, the extra bytes are
- * padded before the jump insn, so we substract the
+ * padded before the jump insn, so we subtract the
* 2 bytes of jmp_cond insn from INSN_SZ_DIFF.
*
* If the previous pass already emits an imm8
@@ -1631,7 +1631,7 @@ emit_jmp:
if (jmp_padding) {
/* To avoid breaking jmp_offset, the extra bytes
* are padded before the actual jmp insn, so
- * 2 bytes is substracted from INSN_SZ_DIFF.
+ * 2 bytes is subtracted from INSN_SZ_DIFF.
*
* If the previous pass already emits an imm8
* jmp, there is nothing to pad (0 byte).
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 0a0e168be1cb..02dc64625e64 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -375,7 +375,7 @@ static const struct dmi_system_id msi_k8t_dmi_table[] = {
* The BIOS only gives options "DISABLED" and "AUTO". This code sets
* the corresponding register-value to enable the soundcard.
*
- * The soundcard is only enabled, if the mainborad is identified
+ * The soundcard is only enabled, if the mainboard is identified
* via DMI-tables and the soundcard is detected to be off.
*/
static void pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 1b82d77019b1..df7b5477fc4f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -195,7 +195,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
}
/*
- * Certain firmware versions are way too sentimential and still believe
+ * Certain firmware versions are way too sentimental and still believe
* they are exclusive and unquestionable owners of the first physical page,
* even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY
* (but then write-access it later during SetVirtualAddressMap()).
@@ -457,7 +457,7 @@ void __init efi_dump_pagetable(void)
* in a kernel thread and user context. Preemption needs to remain disabled
* while the EFI-mm is borrowed. mmgrab()/mmdrop() is not used because the mm
* can not change under us.
- * It should be ensured that there are no concurent calls to this function.
+ * It should be ensured that there are no concurrent calls to this function.
*/
void efi_enter_mm(void)
{
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 67d93a243c35..7850111008a8 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -441,7 +441,7 @@ void __init efi_free_boot_services(void)
* 1.4.4 with SGX enabled booting Linux via Fedora 24's
* grub2-efi on a hard disk. (And no, I don't know why
* this happened, but Linux should still try to boot rather
- * panicing early.)
+ * panicking early.)
*/
rm_size = real_mode_size_needed();
if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
@@ -726,7 +726,7 @@ void efi_crash_gracefully_on_page_fault(unsigned long phys_addr)
* Buggy efi_reset_system() is handled differently from other EFI
* Runtime Services as it doesn't use efi_rts_wq. Although,
* native_machine_emergency_restart() says that machine_real_restart()
- * could fail, it's better not to compilcate this fault handler
+ * could fail, it's better not to complicate this fault handler
* because this case occurs *very* rarely and hence could be improved
* on a need by basis.
*/
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index 0286fe1b14b5..d3d456925b2a 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* imr.c -- Intel Isolated Memory Region driver
*
* Copyright(c) 2013 Intel Corporation.
@@ -551,7 +551,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
/*
* Setup an unlocked IMR around the physical extent of the kernel
- * from the beginning of the .text secton to the end of the
+ * from the beginning of the .text section to the end of the
* .rodata section as one physically contiguous block.
*
* We don't round up @size since it is already PAGE_SIZE aligned.
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
index 570e3062faac..761f3689f60a 100644
--- a/arch/x86/platform/intel-quark/imr_selftest.c
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* imr_selftest.c -- Intel Isolated Memory Region self-test driver
*
* Copyright(c) 2013 Intel Corporation.
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index 526f70f27c1c..fdd49d70b437 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -187,7 +187,7 @@ bool iosf_mbi_available(void)
EXPORT_SYMBOL(iosf_mbi_available);
/*
- **************** P-Unit/kernel shared I2C bus arbritration ****************
+ **************** P-Unit/kernel shared I2C bus arbitration ****************
*
* Some Bay Trail and Cherry Trail devices have the P-Unit and us (the kernel)
* share a single I2C bus to the PMIC. Below are helpers to arbitrate the
@@ -493,7 +493,7 @@ static void iosf_sideband_debug_init(void)
/* mcrx */
debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx);
- /* mcr - initiates mailbox tranaction */
+ /* mcr - initiates mailbox transaction */
debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops);
}
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 85f4638764d6..994a229cb79f 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -27,7 +27,7 @@ static bool lid_wake_on_close;
* wake-on-close. This is implemented as standard by the XO-1.5 DSDT.
*
* We provide here a sysfs attribute that will additionally enable
- * wake-on-close behavior. This is useful (e.g.) when we oportunistically
+ * wake-on-close behavior. This is useful (e.g.) when we opportunistically
* suspend with the display running; if the lid is then closed, we want to
* wake up to turn the display off.
*
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 26d1f6693789..75e3319e8bee 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -131,7 +131,7 @@ void * __init prom_early_alloc(unsigned long size)
const size_t chunk_size = max(PAGE_SIZE, size);
/*
- * To mimimize the number of allocations, grab at least
+ * To minimize the number of allocations, grab at least
* PAGE_SIZE of memory (that's an arbitrary choice that's
* fast enough on the platforms we care about while minimizing
* wasted bootmem) and hand off chunks of it to callers.
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index d2ccadc247e6..66b317398b8a 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -30,10 +30,10 @@
* the boot start info structure.
* - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
* - `cr4`: all bits are cleared.
- * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’
- * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified.
+ * - `cs `: must be a 32-bit read/execute code segment with a base of `0`
+ * and a limit of `0xFFFFFFFF`. The selector value is unspecified.
* - `ds`, `es`: must be a 32-bit read/write data segment with a base of
- * ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all
+ * `0` and a limit of `0xFFFFFFFF`. The selector values are all
* unspecified.
* - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
* of '0x67'.
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index eafc530c8767..1e9ff28bc2e0 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -24,6 +24,7 @@
#include <asm/kdebug.h>
#include <asm/local64.h>
#include <asm/nmi.h>
+#include <asm/reboot.h>
#include <asm/traps.h>
#include <asm/uv/uv.h>
#include <asm/uv/uv_hub.h>
@@ -91,6 +92,8 @@ static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
static atomic_t uv_nmi_slave_continue;
static cpumask_var_t uv_nmi_cpu_mask;
+static atomic_t uv_nmi_kexec_failed;
+
/* Values for uv_nmi_slave_continue */
#define SLAVE_CLEAR 0
#define SLAVE_CONTINUE 1
@@ -834,38 +837,35 @@ static void uv_nmi_touch_watchdogs(void)
touch_nmi_watchdog();
}
-static atomic_t uv_nmi_kexec_failed;
-
-#if defined(CONFIG_KEXEC_CORE)
-static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
+static void uv_nmi_kdump(int cpu, int main, struct pt_regs *regs)
{
+ /* Check if kdump kernel loaded for both main and secondary CPUs */
+ if (!kexec_crash_image) {
+ if (main)
+ pr_err("UV: NMI error: kdump kernel not loaded\n");
+ return;
+ }
+
/* Call crash to dump system state */
- if (master) {
+ if (main) {
pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
crash_kexec(regs);
- pr_emerg("UV: crash_kexec unexpectedly returned, ");
+ pr_emerg("UV: crash_kexec unexpectedly returned\n");
atomic_set(&uv_nmi_kexec_failed, 1);
- if (!kexec_crash_image) {
- pr_cont("crash kernel not loaded\n");
- return;
- }
- pr_cont("kexec busy, stalling cpus while waiting\n");
- }
- /* If crash exec fails the slaves should return, otherwise stall */
- while (atomic_read(&uv_nmi_kexec_failed) == 0)
- mdelay(10);
-}
+ } else { /* secondary */
-#else /* !CONFIG_KEXEC_CORE */
-static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
-{
- if (master)
- pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
- atomic_set(&uv_nmi_kexec_failed, 1);
+ /* If kdump kernel fails, secondaries will exit this loop */
+ while (atomic_read(&uv_nmi_kexec_failed) == 0) {
+
+ /* Once shootdown cpus starts, they do not return */
+ run_crash_ipi_callback(regs);
+
+ mdelay(10);
+ }
+ }
}
-#endif /* !CONFIG_KEXEC_CORE */
#ifdef CONFIG_KGDB
#ifdef CONFIG_KGDB_KDB
@@ -889,7 +889,7 @@ static inline int uv_nmi_kdb_reason(void)
* Call KGDB/KDB from NMI handler
*
* Note that if both KGDB and KDB are configured, then the action of 'kgdb' or
- * 'kdb' has no affect on which is used. See the KGDB documention for further
+ * 'kdb' has no affect on which is used. See the KGDB documentation for further
* information.
*/
static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index db1378c6ff26..c9908bcdb249 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -321,7 +321,7 @@ int hibernate_resume_nonboot_cpu_disable(void)
/*
* When bsp_check() is called in hibernate and suspend, cpu hotplug
- * is disabled already. So it's unnessary to handle race condition between
+ * is disabled already. So it's unnecessary to handle race condition between
* cpumask query and cpu hotplug.
*/
static int bsp_check(void)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 22fda7d99159..1be71ef5e4c4 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -103,7 +103,7 @@ static void __init setup_real_mode(void)
*ptr += phys_base;
}
- /* Must be perfomed *after* relocation. */
+ /* Must be performed *after* relocation. */
trampoline_header = (struct trampoline_header *)
__va(real_mode_header->trampoline_header);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index dc0a337f985b..4f18cd9eacd8 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1070,8 +1070,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_pmc = xen_read_pmc,
- .iret = xen_iret,
-
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
.load_gdt = xen_load_gdt,
@@ -1233,8 +1231,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
- pv_ops.init.patch = paravirt_patch_default;
pv_ops.cpu = xen_cpu_ops;
+ paravirt_iret = xen_iret;
xen_init_irq_ops();
/*
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index cf2ade864c30..1e28c880f642 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2410,7 +2410,7 @@ int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
rmd.prot = prot;
/*
* We use the err_ptr to indicate if there we are doing a contiguous
- * mapping or a discontigious mapping.
+ * mapping or a discontiguous mapping.
*/
rmd.contiguous = !err_ptr;
rmd.no_translate = no_translate;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 91f5b330dcc6..d9c945ee1100 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -379,11 +379,6 @@ void xen_timer_resume(void)
}
}
-static const struct pv_time_ops xen_time_ops __initconst = {
- .sched_clock = xen_sched_clock,
- .steal_clock = xen_steal_clock,
-};
-
static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
static u64 xen_clock_value_saved;
@@ -525,17 +520,24 @@ static void __init xen_time_init(void)
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
}
-void __init xen_init_time_ops(void)
+static void __init xen_init_time_common(void)
{
xen_sched_clock_offset = xen_clocksource_read();
- pv_ops.time = xen_time_ops;
+ static_call_update(pv_steal_clock, xen_steal_clock);
+ paravirt_set_sched_clock(xen_sched_clock);
+
+ x86_platform.calibrate_tsc = xen_tsc_khz;
+ x86_platform.get_wallclock = xen_get_wallclock;
+}
+
+void __init xen_init_time_ops(void)
+{
+ xen_init_time_common();
x86_init.timers.timer_init = xen_time_init;
x86_init.timers.setup_percpu_clockev = x86_init_noop;
x86_cpuinit.setup_percpu_clockev = x86_init_noop;
- x86_platform.calibrate_tsc = xen_tsc_khz;
- x86_platform.get_wallclock = xen_get_wallclock;
/* Dom0 uses the native method to set the hardware RTC. */
if (!xen_initial_domain())
x86_platform.set_wallclock = xen_set_wallclock;
@@ -569,13 +571,11 @@ void __init xen_hvm_init_time_ops(void)
return;
}
- xen_sched_clock_offset = xen_clocksource_read();
- pv_ops.time = xen_time_ops;
+ xen_init_time_common();
+
x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
- x86_platform.calibrate_tsc = xen_tsc_khz;
- x86_platform.get_wallclock = xen_get_wallclock;
x86_platform.set_wallclock = xen_set_wallclock;
}
#endif