summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig23
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/compressed/Makefile54
-rw-r--r--arch/x86/boot/compressed/cpuflags.c4
-rw-r--r--arch/x86/boot/compressed/head_32.S99
-rw-r--r--arch/x86/boot/compressed/head_64.S201
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c360
-rw-r--r--arch/x86/boot/compressed/idt_64.c54
-rw-r--r--arch/x86/boot/compressed/idt_handlers_64.S77
-rw-r--r--arch/x86/boot/compressed/kaslr.c274
-rw-r--r--arch/x86/boot/compressed/kaslr_64.c153
-rw-r--r--arch/x86/boot/compressed/misc.c7
-rw-r--r--arch/x86/boot/compressed/misc.h54
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c6
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c27
-rw-r--r--arch/x86/boot/compressed/sev-es.c214
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S50
-rw-r--r--arch/x86/boot/setup.ld2
-rw-r--r--arch/x86/boot/tools/build.c4
-rw-r--r--arch/x86/boot/tty.c8
-rw-r--r--arch/x86/boot/video.h2
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/blake2s-glue.c1
-rw-r--r--arch/x86/crypto/chacha_glue.c1
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c18
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c69
-rw-r--r--arch/x86/crypto/nhpoly1305-avx2-glue.c1
-rw-r--r--arch/x86/crypto/nhpoly1305-sse2-glue.c1
-rw-r--r--arch/x86/crypto/poly1305-x86_64-cryptogams.pl8
-rw-r--r--arch/x86/crypto/poly1305_glue.c3
-rw-r--r--arch/x86/entry/calling.h1
-rw-r--r--arch/x86/entry/common.c2
-rw-r--r--arch/x86/entry/entry_64.S91
-rw-r--r--arch/x86/entry/syscall_x32.c5
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl15
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl21
-rw-r--r--arch/x86/entry/vdso/Makefile4
-rw-r--r--arch/x86/entry/vdso/vdso32/vclock_gettime.c1
-rw-r--r--arch/x86/events/amd/ibs.c93
-rw-r--r--arch/x86/events/amd/iommu.c2
-rw-r--r--arch/x86/events/amd/uncore.c186
-rw-r--r--arch/x86/events/core.c225
-rw-r--r--arch/x86/events/intel/core.c364
-rw-r--r--arch/x86/events/intel/ds.c32
-rw-r--r--arch/x86/events/intel/uncore.c275
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/intel/uncore_snb.c45
-rw-r--r--arch/x86/events/intel/uncore_snbep.c72
-rw-r--r--arch/x86/events/msr.c1
-rw-r--r--arch/x86/events/perf_event.h54
-rw-r--r--arch/x86/events/rapl.c1
-rw-r--r--arch/x86/hyperv/hv_init.c8
-rw-r--r--arch/x86/hyperv/hv_spinlock.c2
-rw-r--r--arch/x86/ia32/ia32_aout.c1
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/apic.h12
-rw-r--r--arch/x86/include/asm/asm-prototypes.h1
-rw-r--r--arch/x86/include/asm/asm.h12
-rw-r--r--arch/x86/include/asm/cache.h2
-rw-r--r--arch/x86/include/asm/checksum.h1
-rw-r--r--arch/x86/include/asm/checksum_32.h40
-rw-r--r--arch/x86/include/asm/checksum_64.h14
-rw-r--r--arch/x86/include/asm/compat.h3
-rw-r--r--arch/x86/include/asm/copy_mc_test.h75
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h33
-rw-r--r--arch/x86/include/asm/cpufeatures.h6
-rw-r--r--arch/x86/include/asm/debugreg.h2
-rw-r--r--arch/x86/include/asm/desc.h27
-rw-r--r--arch/x86/include/asm/desc_defs.h10
-rw-r--r--arch/x86/include/asm/disabled-features.h9
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/extable.h9
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu/api.h12
-rw-r--r--arch/x86/include/asm/fpu/internal.h38
-rw-r--r--arch/x86/include/asm/fpu/types.h11
-rw-r--r--arch/x86/include/asm/fpu/xcr.h34
-rw-r--r--arch/x86/include/asm/fpu/xstate.h2
-rw-r--r--arch/x86/include/asm/frame.h19
-rw-r--r--arch/x86/include/asm/fsgsbase.h4
-rw-r--r--arch/x86/include/asm/hw_irq.h89
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h33
-rw-r--r--arch/x86/include/asm/idtentry.h60
-rw-r--r--arch/x86/include/asm/insn-eval.h6
-rw-r--r--arch/x86/include/asm/intel-mid.h2
-rw-r--r--arch/x86/include/asm/io.h17
-rw-r--r--arch/x86/include/asm/irq_remapping.h8
-rw-r--r--arch/x86/include/asm/irq_stack.h69
-rw-r--r--arch/x86/include/asm/irqdomain.h8
-rw-r--r--arch/x86/include/asm/irqflags.h2
-rw-r--r--arch/x86/include/asm/kprobes.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h70
-rw-r--r--arch/x86/include/asm/mce.h33
-rw-r--r--arch/x86/include/asm/mcsafe_test.h75
-rw-r--r--arch/x86/include/asm/mem_encrypt.h7
-rw-r--r--arch/x86/include/asm/mpspec.h10
-rw-r--r--arch/x86/include/asm/mshyperv.h1
-rw-r--r--arch/x86/include/asm/msi.h2
-rw-r--r--arch/x86/include/asm/msr-index.h10
-rw-r--r--arch/x86/include/asm/msr.h20
-rw-r--r--arch/x86/include/asm/nospec-branch.h18
-rw-r--r--arch/x86/include/asm/numa.h10
-rw-r--r--arch/x86/include/asm/orc_types.h34
-rw-r--r--arch/x86/include/asm/page_32_types.h11
-rw-r--r--arch/x86/include/asm/page_64_types.h39
-rw-r--r--arch/x86/include/asm/paravirt.h151
-rw-r--r--arch/x86/include/asm/paravirt_types.h23
-rw-r--r--arch/x86/include/asm/pci.h11
-rw-r--r--arch/x86/include/asm/pci_x86.h11
-rw-r--r--arch/x86/include/asm/perf_event.h98
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h5
-rw-r--r--arch/x86/include/asm/pgtable.h9
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/processor.h63
-rw-r--r--arch/x86/include/asm/proto.h1
-rw-r--r--arch/x86/include/asm/realmode.h7
-rw-r--r--arch/x86/include/asm/required-features.h2
-rw-r--r--arch/x86/include/asm/segment.h6
-rw-r--r--arch/x86/include/asm/setup.h8
-rw-r--r--arch/x86/include/asm/sev-es.h114
-rw-r--r--arch/x86/include/asm/special_insns.h98
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/static_call.h40
-rw-r--r--arch/x86/include/asm/string_64.h32
-rw-r--r--arch/x86/include/asm/svm.h196
-rw-r--r--arch/x86/include/asm/sync_core.h34
-rw-r--r--arch/x86/include/asm/text-patching.h19
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/trap_pf.h24
-rw-r--r--arch/x86/include/asm/trapnr.h1
-rw-r--r--arch/x86/include/asm/traps.h22
-rw-r--r--arch/x86/include/asm/uaccess.h352
-rw-r--r--arch/x86/include/asm/uaccess_64.h20
-rw-r--r--arch/x86/include/asm/unwind_hints.h56
-rw-r--r--arch/x86/include/asm/uv/bios.h17
-rw-r--r--arch/x86/include/asm/uv/uv.h4
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h755
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h165
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h6992
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/include/asm/x86_init.h32
-rw-r--r--arch/x86/include/uapi/asm/kvm.h20
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/include/uapi/asm/svm.h13
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/alternative.c14
-rw-r--r--arch/x86/kernel/amd_gart_64.c12
-rw-r--r--arch/x86/kernel/apic/apic.c3
-rw-r--r--arch/x86/kernel/apic/io_apic.c75
-rw-r--r--arch/x86/kernel/apic/msi.c118
-rw-r--r--arch/x86/kernel/apic/probe_32.c3
-rw-r--r--arch/x86/kernel/apic/probe_64.c3
-rw-r--r--arch/x86/kernel/apic/vector.c3
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c822
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/centaur.c27
-rw-r--r--arch/x86/kernel/cpu/common.c92
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c2
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c44
-rw-r--r--arch/x86/kernel/cpu/mce/core.c188
-rw-r--r--arch/x86/kernel/cpu/mce/dev-mcelog.c4
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h13
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c102
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c60
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c92
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h49
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c16
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c87
-rw-r--r--arch/x86/kernel/cpu/scattered.c3
-rw-r--r--arch/x86/kernel/cpu/vmware.c50
-rw-r--r--arch/x86/kernel/devicetree.c4
-rw-r--r--arch/x86/kernel/dumpstack.c10
-rw-r--r--arch/x86/kernel/dumpstack_64.c46
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/fpu/init.c41
-rw-r--r--arch/x86/kernel/fpu/xstate.c63
-rw-r--r--arch/x86/kernel/head64.c124
-rw-r--r--arch/x86/kernel/head_64.S165
-rw-r--r--arch/x86/kernel/hw_breakpoint.c58
-rw-r--r--arch/x86/kernel/idt.c44
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c3
-rw-r--r--arch/x86/kernel/kgdb.c5
-rw-r--r--arch/x86/kernel/kprobes/core.c111
-rw-r--r--arch/x86/kernel/kprobes/opt.c7
-rw-r--r--arch/x86/kernel/kvm.c59
-rw-r--r--arch/x86/kernel/mpparse.c27
-rw-r--r--arch/x86/kernel/msr.c18
-rw-r--r--arch/x86/kernel/nmi.c20
-rw-r--r--arch/x86/kernel/paravirt.c19
-rw-r--r--arch/x86/kernel/paravirt_patch.c17
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/process.c3
-rw-r--r--arch/x86/kernel/process_64.c8
-rw-r--r--arch/x86/kernel/ptrace.c6
-rw-r--r--arch/x86/kernel/quirks.c10
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c62
-rw-r--r--arch/x86/kernel/sev-es-shared.c507
-rw-r--r--arch/x86/kernel/sev-es.c1404
-rw-r--r--arch/x86/kernel/signal_compat.c2
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/stacktrace.c10
-rw-r--r--arch/x86/kernel/static_call.c98
-rw-r--r--arch/x86/kernel/sys_ia32.c2
-rw-r--r--arch/x86/kernel/traps.c213
-rw-r--r--arch/x86/kernel/tsc.c10
-rw-r--r--arch/x86/kernel/umip.c89
-rw-r--r--arch/x86/kernel/unwind_orc.c20
-rw-r--r--arch/x86/kernel/vmlinux.lds.S40
-rw-r--r--arch/x86/kernel/x86_init.c26
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile6
-rw-r--r--arch/x86/kvm/cpuid.c139
-rw-r--r--arch/x86/kvm/cpuid.h15
-rw-r--r--arch/x86/kvm/emulate.c22
-rw-r--r--arch/x86/kvm/hyperv.c31
-rw-r--r--arch/x86/kvm/ioapic.c5
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h2
-rw-r--r--arch/x86/kvm/lapic.c43
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/mmu/mmu.c984
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h88
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h21
-rw-r--r--arch/x86/kvm/mmu/page_track.c6
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h50
-rw-r--r--arch/x86/kvm/mmu/spte.c318
-rw-r--r--arch/x86/kvm/mmu/spte.h252
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.c182
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h60
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c1157
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h48
-rw-r--r--arch/x86/kvm/svm/avic.c7
-rw-r--r--arch/x86/kvm/svm/nested.c394
-rw-r--r--arch/x86/kvm/svm/sev.c29
-rw-r--r--arch/x86/kvm/svm/svm.c416
-rw-r--r--arch/x86/kvm/svm/svm.h103
-rw-r--r--arch/x86/kvm/trace.h128
-rw-r--r--arch/x86/kvm/vmx/capabilities.h10
-rw-r--r--arch/x86/kvm/vmx/evmcs.c3
-rw-r--r--arch/x86/kvm/vmx/evmcs.h3
-rw-r--r--arch/x86/kvm/vmx/nested.c180
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c332
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h99
-rw-r--r--arch/x86/kvm/vmx/vmcs.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S34
-rw-r--r--arch/x86/kvm/vmx/vmx.c1302
-rw-r--r--arch/x86/kvm/vmx/vmx.h148
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h (renamed from arch/x86/kvm/vmx/ops.h)0
-rw-r--r--arch/x86/kvm/x86.c722
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/checksum_32.S117
-rw-r--r--arch/x86/lib/copy_mc.c96
-rw-r--r--arch/x86/lib/copy_mc_64.S163
-rw-r--r--arch/x86/lib/copy_user_64.S111
-rw-r--r--arch/x86/lib/csum-copy_64.S140
-rw-r--r--arch/x86/lib/csum-wrappers_64.c86
-rw-r--r--arch/x86/lib/getuser.S107
-rw-r--r--arch/x86/lib/insn-eval.c130
-rw-r--r--arch/x86/lib/memcpy_64.S115
-rw-r--r--arch/x86/lib/putuser.S47
-rw-r--r--arch/x86/lib/usercopy_64.c23
-rw-r--r--arch/x86/mm/cpu_entry_area.c3
-rw-r--r--arch/x86/mm/extable.c25
-rw-r--r--arch/x86/mm/fault.c15
-rw-r--r--arch/x86/mm/init_64.c20
-rw-r--r--arch/x86/mm/mem_encrypt.c42
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c5
-rw-r--r--arch/x86/mm/numa.c34
-rw-r--r--arch/x86/mm/numa_emulation.c3
-rw-r--r--arch/x86/mm/pat/set_memory.c2
-rw-r--r--arch/x86/mm/tlb.c24
-rw-r--r--arch/x86/net/bpf_jit_comp.c310
-rw-r--r--arch/x86/pci/common.c18
-rw-r--r--arch/x86/pci/fixup.c2
-rw-r--r--arch/x86/pci/init.c13
-rw-r--r--arch/x86/pci/intel_mid_pci.c3
-rw-r--r--arch/x86/pci/sta2x11-fixup.c7
-rw-r--r--arch/x86/pci/xen.c137
-rw-r--r--arch/x86/platform/efi/efi.c3
-rw-r--r--arch/x86/platform/efi/efi_64.c10
-rw-r--r--arch/x86/platform/pvh/enlighten.c4
-rw-r--r--arch/x86/platform/uv/Makefile2
-rw-r--r--arch/x86/platform/uv/bios_uv.c28
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2097
-rw-r--r--arch/x86/platform/uv/uv_irq.c16
-rw-r--r--arch/x86/platform/uv/uv_nmi.c62
-rw-r--r--arch/x86/platform/uv/uv_time.c18
-rw-r--r--arch/x86/purgatory/purgatory.c4
-rw-r--r--arch/x86/realmode/init.c24
-rw-r--r--arch/x86/realmode/rm/header.S3
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S20
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk50
-rw-r--r--arch/x86/um/asm/checksum.h16
-rw-r--r--arch/x86/um/asm/checksum_32.h23
-rw-r--r--arch/x86/um/ptrace_64.c13
-rw-r--r--arch/x86/um/user-offsets.c2
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c19
-rw-r--r--arch/x86/xen/enlighten_pvh.c2
-rw-r--r--arch/x86/xen/grant-table.c27
-rw-r--r--arch/x86/xen/mmu_pv.c10
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c2
-rw-r--r--arch/x86/xen/smp.c19
-rw-r--r--arch/x86/xen/xen-ops.h2
313 files changed, 18035 insertions, 12605 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7101ac64bb20..f6946b81f74a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -75,7 +75,7 @@ config X86
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
- select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
+ select ARCH_HAS_COPY_MC if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
@@ -215,6 +215,8 @@ config X86
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
+ select HAVE_STATIC_CALL
+ select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
@@ -230,6 +232,7 @@ config X86
select RTC_MC146818_LIB
select SPARSE_IRQ
select SRCU
+ select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select USER_STACKTRACE_SUPPORT
@@ -451,7 +454,6 @@ config GOLDFISH
config RETPOLINE
bool "Avoid speculative indirect branches in kernel"
default y
- select STACK_VALIDATION if HAVE_STACK_VALIDATION
help
Compile kernel with the retpoline compiler options to guard against
kernel-to-user data leaks by avoiding speculative indirect
@@ -1521,6 +1523,7 @@ config AMD_MEM_ENCRYPT
select DYNAMIC_PHYSICAL_MASK
select ARCH_USE_MEMREMAP_PROT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ select INSTRUCTION_DECODER
help
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
@@ -1968,22 +1971,6 @@ config EFI_MIXED
If unsure, say N.
-config SECCOMP
- def_bool y
- prompt "Enable seccomp to safely compute untrusted bytecode"
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y. Only embedded should say N here.
-
source "kernel/Kconfig.hz"
config KEXEC
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ee1d3c5834c6..27b5e2bc6a01 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.
-config MCSAFE_TEST
+config COPY_MC_TEST
def_bool n
config EFI_PGT_DUMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4346ffb2e39f..154259f18b8b 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -209,6 +209,10 @@ ifdef CONFIG_X86_64
LDFLAGS_vmlinux += -z max-page-size=0x200000
endif
+# We never want expected sections to be placed heuristically by the
+# linker. All sections should be explicitly named in the linker script.
+LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 3962f592633d..ee249088cbfe 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -29,10 +29,10 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst
KBUILD_CFLAGS := -m$(BITS) -O2
-KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
+KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
-cflags-$(CONFIG_X86_64) := -mcmodel=small
+cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-mmx -mno-sse
KBUILD_CFLAGS += -ffreestanding
@@ -43,24 +43,26 @@ KBUILD_CFLAGS += -Wno-pointer-sign
KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += -D__DISABLE_EXPORTS
+# Disable relocation relaxation in case the link is not PIE.
+KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
+KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
+
+# sev-es.c indirectly inludes inat-table.h which is generated during
+# compilation and stored in $(objtree). Add the directory to the includes so
+# that the compiler finds it even with out-of-tree builds (make O=/some/path).
+CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
UBSAN_SANITIZE :=n
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
# Compressed kernel should be built as PIE since it may be loaded at any
# address by the bootloader.
-ifeq ($(CONFIG_X86_32),y)
-KBUILD_LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
-else
-# To build 64-bit compressed kernel as PIE, we disable relocation
-# overflow check to avoid relocation overflow error with a new linker
-# command-line option, -z noreloc-overflow.
-KBUILD_LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
- && echo "-z noreloc-overflow -pie --no-dynamic-linker")
-endif
-LDFLAGS_vmlinux := -T
+LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
+LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
@@ -84,9 +86,11 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
- vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
+ vmlinux-objs-y += $(obj)/ident_map_64.o
+ vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
+ vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o
endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
@@ -94,30 +98,8 @@ vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
-# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
-# can place it anywhere in memory and it will still run. However, since
-# it is executed as-is without any ELF relocation processing performed
-# (and has already had all relocation sections stripped from the binary),
-# none of the code can use data relocations (e.g. static assignments of
-# pointer values), since they will be meaningless at runtime. This check
-# will refuse to link the vmlinux if any of these relocations are found.
-quiet_cmd_check_data_rel = DATAREL $@
-define cmd_check_data_rel
- for obj in $(filter %.o,$^); do \
- $(READELF) -S $$obj | grep -qF .rel.local && { \
- echo "error: $$obj has data relocations!" >&2; \
- exit 1; \
- } || true; \
- done
-endef
-
-# We need to run two commands under "if_changed", so merge them into a
-# single invocation.
-quiet_cmd_check-and-link-vmlinux = LD $@
- cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
-
$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE
- $(call if_changed,check-and-link-vmlinux)
+ $(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
index 6448a8196d32..0cc1323896d1 100644
--- a/arch/x86/boot/compressed/cpuflags.c
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#ifdef CONFIG_RANDOMIZE_BASE
-
#include "../cpuflags.c"
bool has_cpuflag(int flag)
@@ -9,5 +7,3 @@ bool has_cpuflag(int flag)
return test_bit(flag, cpu.flags);
}
-
-#endif
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 03557f2174bf..659fad53ca82 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -33,32 +33,13 @@
#include <asm/bootparam.h>
/*
- * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
- * relocation to get the symbol address in PIC. When the compressed x86
- * kernel isn't built as PIC, the linker optimizes R_386_GOT32X
- * relocations to their fixed symbol addresses. However, when the
- * compressed x86 kernel is loaded at a different address, it leads
- * to the following load failure:
- *
- * Failed to allocate space for phdrs
- *
- * during the decompression stage.
- *
- * If the compressed x86 kernel is relocatable at run-time, it should be
- * compiled with -fPIE, instead of -fPIC, if possible and should be built as
- * Position Independent Executable (PIE) so that linker won't optimize
- * R_386_GOT32X relocation to its fixed symbol address. Older
- * linkers generate R_386_32 relocations against locally defined symbols,
- * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less
- * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle
- * R_386_32 relocations when relocating the kernel. To generate
- * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as
- * hidden:
+ * These symbols needed to be marked as .hidden to prevent the BFD linker from
+ * generating R_386_32 (rather than R_386_RELATIVE) relocations for them when
+ * the 32-bit compressed kernel is linked as PIE. This is no longer necessary,
+ * but it doesn't hurt to keep them .hidden.
*/
.hidden _bss
.hidden _ebss
- .hidden _got
- .hidden _egot
.hidden _end
__HEAD
@@ -77,10 +58,10 @@ SYM_FUNC_START(startup_32)
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %edx
- subl $1b, %edx
+ addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %edx
/* Load new GDT */
- leal gdt(%edx), %eax
+ leal gdt@GOTOFF(%edx), %eax
movl %eax, 2(%eax)
lgdt (%eax)
@@ -93,14 +74,16 @@ SYM_FUNC_START(startup_32)
movl %eax, %ss
/*
- * %edx contains the address we are loaded at by the boot loader and %ebx
- * contains the address where we should move the kernel image temporarily
- * for safe in-place decompression. %ebp contains the address that the kernel
- * will be decompressed to.
+ * %edx contains the address we are loaded at by the boot loader (plus the
+ * offset to the GOT). The below code calculates %ebx to be the address where
+ * we should move the kernel image temporarily for safe in-place decompression
+ * (again, plus the offset to the GOT).
+ *
+ * %ebp is calculated to be the address that the kernel will be decompressed to.
*/
#ifdef CONFIG_RELOCATABLE
- movl %edx, %ebx
+ leal startup_32@GOTOFF(%edx), %ebx
#ifdef CONFIG_EFI_STUB
/*
@@ -111,7 +94,7 @@ SYM_FUNC_START(startup_32)
* image_offset = startup_32 - image_base
* Otherwise image_offset will be zero and has no effect on the calculations.
*/
- subl image_offset(%edx), %ebx
+ subl image_offset@GOTOFF(%edx), %ebx
#endif
movl BP_kernel_alignment(%esi), %eax
@@ -128,10 +111,10 @@ SYM_FUNC_START(startup_32)
movl %ebx, %ebp // Save the output address for later
/* Target address to relocate to for decompression */
addl BP_init_size(%esi), %ebx
- subl $_end, %ebx
+ subl $_end@GOTOFF, %ebx
/* Set up the stack */
- leal boot_stack_end(%ebx), %esp
+ leal boot_stack_end@GOTOFF(%ebx), %esp
/* Zero EFLAGS */
pushl $0
@@ -142,8 +125,8 @@ SYM_FUNC_START(startup_32)
* where decompression in place becomes safe.
*/
pushl %esi
- leal (_bss-4)(%edx), %esi
- leal (_bss-4)(%ebx), %edi
+ leal (_bss@GOTOFF-4)(%edx), %esi
+ leal (_bss@GOTOFF-4)(%ebx), %edi
movl $(_bss - startup_32), %ecx
shrl $2, %ecx
std
@@ -156,14 +139,14 @@ SYM_FUNC_START(startup_32)
* during extract_kernel below. To avoid any issues, repoint the GDTR
* to the new copy of the GDT.
*/
- leal gdt(%ebx), %eax
+ leal gdt@GOTOFF(%ebx), %eax
movl %eax, 2(%eax)
lgdt (%eax)
/*
* Jump to the relocated address.
*/
- leal .Lrelocated(%ebx), %eax
+ leal .Lrelocated@GOTOFF(%ebx), %eax
jmp *%eax
SYM_FUNC_END(startup_32)
@@ -173,7 +156,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry)
add $0x4, %esp
movl 8(%esp), %esi /* save boot_params pointer */
call efi_main
- leal startup_32(%eax), %eax
+ /* efi_main returns the possibly relocated address of startup_32 */
jmp *%eax
SYM_FUNC_END(efi32_stub_entry)
SYM_FUNC_END_ALIAS(efi_stub_entry)
@@ -186,40 +169,26 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
* Clear BSS (stack is currently empty)
*/
xorl %eax, %eax
- leal _bss(%ebx), %edi
- leal _ebss(%ebx), %ecx
+ leal _bss@GOTOFF(%ebx), %edi
+ leal _ebss@GOTOFF(%ebx), %ecx
subl %edi, %ecx
shrl $2, %ecx
rep stosl
/*
- * Adjust our own GOT
- */
- leal _got(%ebx), %edx
- leal _egot(%ebx), %ecx
-1:
- cmpl %ecx, %edx
- jae 2f
- addl %ebx, (%edx)
- addl $4, %edx
- jmp 1b
-2:
-
-/*
* Do the extraction, and jump to the new kernel..
*/
- /* push arguments for extract_kernel: */
- pushl $z_output_len /* decompressed length, end of relocs */
-
- pushl %ebp /* output address */
-
- pushl $z_input_len /* input_len */
- leal input_data(%ebx), %eax
- pushl %eax /* input_data */
- leal boot_heap(%ebx), %eax
- pushl %eax /* heap area */
- pushl %esi /* real mode pointer */
- call extract_kernel /* returns kernel location in %eax */
+ /* push arguments for extract_kernel: */
+
+ pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
+ pushl %ebp /* output address */
+ pushl input_len@GOTOFF(%ebx) /* input_len */
+ leal input_data@GOTOFF(%ebx), %eax
+ pushl %eax /* input_data */
+ leal boot_heap@GOTOFF(%ebx), %eax
+ pushl %eax /* heap area */
+ pushl %esi /* real mode pointer */
+ call extract_kernel /* returns kernel location in %eax */
addl $24, %esp
/*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 97d37f0a34f5..017de6cc87dc 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
#include "pgtable.h"
/*
@@ -40,11 +41,35 @@
*/
.hidden _bss
.hidden _ebss
- .hidden _got
- .hidden _egot
.hidden _end
__HEAD
+
+/*
+ * This macro gives the relative virtual address of X, i.e. the offset of X
+ * from startup_32. This is the same as the link-time virtual address of X,
+ * since startup_32 is at 0, but defining it this way tells the
+ * assembler/linker that we do not want the actual run-time address of X. This
+ * prevents the linker from trying to create unwanted run-time relocation
+ * entries for the reference when the compressed kernel is linked as PIE.
+ *
+ * A reference X(%reg) will result in the link-time VA of X being stored with
+ * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
+ * adds the 64-bit base address where the kernel is loaded.
+ *
+ * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
+ * and no run-time relocation.
+ *
+ * The macro should be used as a displacement with a base register containing
+ * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
+ * [$ rva(X)].
+ *
+ * This macro can only be used from within the .head.text section, since the
+ * expression requires startup_32 to be in the same section as the code being
+ * assembled.
+ */
+#define rva(X) ((X) - startup_32)
+
.code32
SYM_FUNC_START(startup_32)
/*
@@ -67,10 +92,10 @@ SYM_FUNC_START(startup_32)
leal (BP_scratch+4)(%esi), %esp
call 1f
1: popl %ebp
- subl $1b, %ebp
+ subl $ rva(1b), %ebp
/* Load new GDT with the 64bit segments using 32bit descriptor */
- leal gdt(%ebp), %eax
+ leal rva(gdt)(%ebp), %eax
movl %eax, 2(%eax)
lgdt (%eax)
@@ -83,7 +108,7 @@ SYM_FUNC_START(startup_32)
movl %eax, %ss
/* setup a stack and make sure cpu supports long mode. */
- leal boot_stack_end(%ebp), %esp
+ leal rva(boot_stack_end)(%ebp), %esp
call verify_cpu
testl %eax, %eax
@@ -110,7 +135,7 @@ SYM_FUNC_START(startup_32)
* image_offset = startup_32 - image_base
* Otherwise image_offset will be zero and has no effect on the calculations.
*/
- subl image_offset(%ebp), %ebx
+ subl rva(image_offset)(%ebp), %ebx
#endif
movl BP_kernel_alignment(%esi), %eax
@@ -126,7 +151,7 @@ SYM_FUNC_START(startup_32)
/* Target address to relocate to for decompression */
addl BP_init_size(%esi), %ebx
- subl $_end, %ebx
+ subl $ rva(_end), %ebx
/*
* Prepare for entering 64 bit mode
@@ -154,19 +179,19 @@ SYM_FUNC_START(startup_32)
1:
/* Initialize Page tables to 0 */
- leal pgtable(%ebx), %edi
+ leal rva(pgtable)(%ebx), %edi
xorl %eax, %eax
movl $(BOOT_INIT_PGT_SIZE/4), %ecx
rep stosl
/* Build Level 4 */
- leal pgtable + 0(%ebx), %edi
+ leal rva(pgtable + 0)(%ebx), %edi
leal 0x1007 (%edi), %eax
movl %eax, 0(%edi)
addl %edx, 4(%edi)
/* Build Level 3 */
- leal pgtable + 0x1000(%ebx), %edi
+ leal rva(pgtable + 0x1000)(%ebx), %edi
leal 0x1007(%edi), %eax
movl $4, %ecx
1: movl %eax, 0x00(%edi)
@@ -177,7 +202,7 @@ SYM_FUNC_START(startup_32)
jnz 1b
/* Build Level 2 */
- leal pgtable + 0x2000(%ebx), %edi
+ leal rva(pgtable + 0x2000)(%ebx), %edi
movl $0x00000183, %eax
movl $2048, %ecx
1: movl %eax, 0(%edi)
@@ -188,7 +213,7 @@ SYM_FUNC_START(startup_32)
jnz 1b
/* Enable the boot page tables */
- leal pgtable(%ebx), %eax
+ leal rva(pgtable)(%ebx), %eax
movl %eax, %cr3
/* Enable Long mode in EFER (Extended Feature Enable Register) */
@@ -213,14 +238,14 @@ SYM_FUNC_START(startup_32)
* We place all of the values on our mini stack so lret can
* used to perform that far jump.
*/
- leal startup_64(%ebp), %eax
+ leal rva(startup_64)(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
- movl efi32_boot_args(%ebp), %edi
+ movl rva(efi32_boot_args)(%ebp), %edi
cmp $0, %edi
jz 1f
- leal efi64_stub_entry(%ebp), %eax
- movl efi32_boot_args+4(%ebp), %esi
- movl efi32_boot_args+8(%ebp), %edx // saved bootparams pointer
+ leal rva(efi64_stub_entry)(%ebp), %eax
+ movl rva(efi32_boot_args+4)(%ebp), %esi
+ movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer
cmpl $0, %edx
jnz 1f
/*
@@ -231,7 +256,7 @@ SYM_FUNC_START(startup_32)
* the correct stack alignment for entry.
*/
subl $40, %esp
- leal efi_pe_entry(%ebp), %eax
+ leal rva(efi_pe_entry)(%ebp), %eax
movl %edi, %ecx // MS calling convention
movl %esi, %edx
1:
@@ -257,18 +282,18 @@ SYM_FUNC_START(efi32_stub_entry)
call 1f
1: pop %ebp
- subl $1b, %ebp
+ subl $ rva(1b), %ebp
- movl %esi, efi32_boot_args+8(%ebp)
+ movl %esi, rva(efi32_boot_args+8)(%ebp)
SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
- movl %ecx, efi32_boot_args(%ebp)
- movl %edx, efi32_boot_args+4(%ebp)
- movb $0, efi_is64(%ebp)
+ movl %ecx, rva(efi32_boot_args)(%ebp)
+ movl %edx, rva(efi32_boot_args+4)(%ebp)
+ movb $0, rva(efi_is64)(%ebp)
/* Save firmware GDTR and code/data selectors */
- sgdtl efi32_boot_gdt(%ebp)
- movw %cs, efi32_boot_cs(%ebp)
- movw %ds, efi32_boot_ds(%ebp)
+ sgdtl rva(efi32_boot_gdt)(%ebp)
+ movw %cs, rva(efi32_boot_cs)(%ebp)
+ movw %ds, rva(efi32_boot_ds)(%ebp)
/* Disable paging */
movl %cr0, %eax
@@ -347,30 +372,11 @@ SYM_CODE_START(startup_64)
/* Target address to relocate to for decompression */
movl BP_init_size(%rsi), %ebx
- subl $_end, %ebx
+ subl $ rva(_end), %ebx
addq %rbp, %rbx
/* Set up the stack */
- leaq boot_stack_end(%rbx), %rsp
-
- /*
- * paging_prepare() and cleanup_trampoline() below can have GOT
- * references. Adjust the table with address we are running at.
- *
- * Zero RAX for adjust_got: the GOT was not adjusted before;
- * there's no adjustment to undo.
- */
- xorq %rax, %rax
-
- /*
- * Calculate the address the binary is loaded at and use it as
- * a GOT adjustment.
- */
- call 1f
-1: popq %rdi
- subq $1b, %rdi
-
- call .Ladjust_got
+ leaq rva(boot_stack_end)(%rbx), %rsp
/*
* At this point we are in long mode with 4-level paging enabled,
@@ -410,6 +416,10 @@ SYM_CODE_START(startup_64)
.Lon_kernel_cs:
+ pushq %rsi
+ call load_stage1_idt
+ popq %rsi
+
/*
* paging_prepare() sets up the trampoline and checks if we need to
* enable 5-level paging.
@@ -444,7 +454,7 @@ SYM_CODE_START(startup_64)
lretq
trampoline_return:
/* Restore the stack, the 32-bit trampoline uses its own stack */
- leaq boot_stack_end(%rbx), %rsp
+ leaq rva(boot_stack_end)(%rbx), %rsp
/*
* cleanup_trampoline() would restore trampoline memory.
@@ -456,7 +466,7 @@ trampoline_return:
* this function call.
*/
pushq %rsi
- leaq top_pgtable(%rbx), %rdi
+ leaq rva(top_pgtable)(%rbx), %rdi
call cleanup_trampoline
popq %rsi
@@ -464,30 +474,15 @@ trampoline_return:
pushq $0
popfq
- /*
- * Previously we've adjusted the GOT with address the binary was
- * loaded at. Now we need to re-adjust for relocation address.
- *
- * Calculate the address the binary is loaded at, so that we can
- * undo the previous GOT adjustment.
- */
- call 1f
-1: popq %rax
- subq $1b, %rax
-
- /* The new adjustment is the relocation address */
- movq %rbx, %rdi
- call .Ladjust_got
-
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
*/
pushq %rsi
leaq (_bss-8)(%rip), %rsi
- leaq (_bss-8)(%rbx), %rdi
- movq $_bss /* - $startup_32 */, %rcx
- shrq $3, %rcx
+ leaq rva(_bss-8)(%rbx), %rdi
+ movl $(_bss - startup_32), %ecx
+ shrl $3, %ecx
std
rep movsq
cld
@@ -498,15 +493,15 @@ trampoline_return:
* during extract_kernel below. To avoid any issues, repoint the GDTR
* to the new copy of the GDT.
*/
- leaq gdt64(%rbx), %rax
- leaq gdt(%rbx), %rdx
+ leaq rva(gdt64)(%rbx), %rax
+ leaq rva(gdt)(%rbx), %rdx
movq %rdx, 2(%rax)
lgdt (%rax)
/*
* Jump to the relocated address.
*/
- leaq .Lrelocated(%rbx), %rax
+ leaq rva(.Lrelocated)(%rbx), %rax
jmp *%rax
SYM_CODE_END(startup_64)
@@ -518,7 +513,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry)
movq %rdx, %rbx /* save boot_params pointer */
call efi_main
movq %rbx,%rsi
- leaq startup_64(%rax), %rax
+ leaq rva(startup_64)(%rax), %rax
jmp *%rax
SYM_FUNC_END(efi64_stub_entry)
SYM_FUNC_END_ALIAS(efi_stub_entry)
@@ -538,15 +533,33 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
rep stosq
/*
+ * If running as an SEV guest, the encryption mask is required in the
+ * page-table setup code below. When the guest also has SEV-ES enabled
+ * set_sev_encryption_mask() will cause #VC exceptions, but the stage2
+ * handler can't map its GHCB because the page-table is not set up yet.
+ * So set up the encryption mask here while still on the stage1 #VC
+ * handler. Then load stage2 IDT and switch to the kernel's own
+ * page-table.
+ */
+ pushq %rsi
+ call set_sev_encryption_mask
+ call load_stage2_idt
+
+ /* Pass boot_params to initialize_identity_maps() */
+ movq (%rsp), %rdi
+ call initialize_identity_maps
+ popq %rsi
+
+/*
* Do the extraction, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
movq %rsi, %rdi /* real mode address */
leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
leaq input_data(%rip), %rdx /* input_data */
- movl $z_input_len, %ecx /* input_len */
+ movl input_len(%rip), %ecx /* input_len */
movq %rbp, %r8 /* output target address */
- movl $z_output_len, %r9d /* decompressed length, end of relocs */
+ movl output_len(%rip), %r9d /* decompressed length, end of relocs */
call extract_kernel /* returns kernel location in %rax */
popq %rsi
@@ -556,27 +569,6 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
jmp *%rax
SYM_FUNC_END(.Lrelocated)
-/*
- * Adjust the global offset table
- *
- * RAX is the previous adjustment of the table to undo (use 0 if it's the
- * first time we touch GOT).
- * RDI is the new adjustment to apply.
- */
-.Ladjust_got:
- /* Walk through the GOT adding the address to the entries */
- leaq _got(%rip), %rdx
- leaq _egot(%rip), %rcx
-1:
- cmpq %rcx, %rdx
- jae 2f
- subq %rax, (%rdx) /* Undo previous adjustment */
- addq %rdi, (%rdx) /* Apply the new adjustment */
- addq $8, %rdx
- jmp 1b
-2:
- ret
-
.code32
/*
* This is the 32-bit trampoline that will be copied over to low memory.
@@ -690,10 +682,21 @@ SYM_DATA_START_LOCAL(gdt)
.quad 0x0000000000000000 /* TS continued */
SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+SYM_DATA_START(boot_idt_desc)
+ .word boot_idt_end - boot_idt - 1
+ .quad 0
+SYM_DATA_END(boot_idt_desc)
+ .balign 8
+SYM_DATA_START(boot_idt)
+ .rept BOOT_IDT_ENTRIES
+ .quad 0
+ .quad 0
+ .endr
+SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
+
#ifdef CONFIG_EFI_STUB
SYM_DATA(image_offset, .long 0)
#endif
-
#ifdef CONFIG_EFI_MIXED
SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
SYM_DATA(efi_is64, .byte 1)
@@ -702,7 +705,7 @@ SYM_DATA(efi_is64, .byte 1)
#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
- .text
+ __HEAD
.code32
SYM_FUNC_START(efi32_pe_entry)
/*
@@ -724,12 +727,12 @@ SYM_FUNC_START(efi32_pe_entry)
call 1f
1: pop %ebx
- subl $1b, %ebx
+ subl $ rva(1b), %ebx
/* Get the loaded image protocol pointer from the image handle */
leal -4(%ebp), %eax
pushl %eax // &loaded_image
- leal loaded_image_proto(%ebx), %eax
+ leal rva(loaded_image_proto)(%ebx), %eax
pushl %eax // pass the GUID address
pushl 8(%ebp) // pass the image handle
@@ -764,7 +767,7 @@ SYM_FUNC_START(efi32_pe_entry)
* use it before we get to the 64-bit efi_pe_entry() in C code.
*/
subl %esi, %ebx
- movl %ebx, image_offset(%ebp) // save image_offset
+ movl %ebx, rva(image_offset)(%ebp) // save image_offset
jmp efi32_pe_stub_entry
2: popl %edi // restore callee-save registers
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
new file mode 100644
index 000000000000..a5e5db6ada3c
--- /dev/null
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code is used on x86_64 to create page table identity mappings on
+ * demand by building up a new set of page tables (or appending to the
+ * existing ones), and then switching over to them when ready.
+ *
+ * Copyright (C) 2015-2016 Yinghai Lu
+ * Copyright (C) 2016 Kees Cook
+ */
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
+#include "error.h"
+#include "misc.h"
+
+/* These actually do the work of building the kernel identity maps. */
+#include <linux/pgtable.h>
+#include <asm/cmpxchg.h>
+#include <asm/trap_pf.h>
+#include <asm/trapnr.h>
+#include <asm/init.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
+#include "../../mm/ident_map.c"
+
+#define _SETUP
+#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
+#undef _SETUP
+
+extern unsigned long get_cmd_line_ptr(void);
+
+/* Used by PAGE_KERN* macros: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
+
+/* Used to track our page table allocation area. */
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+/*
+ * Allocates space for a page table entry, using struct alloc_pgt_data
+ * above. Besides the local callers, this is used as the allocation
+ * callback in mapping_info below.
+ */
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
+ unsigned char *entry;
+
+ /* Validate there is space available for a new page. */
+ if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
+ debug_putaddr(pages->pgt_buf_offset);
+ debug_putaddr(pages->pgt_buf_size);
+ return NULL;
+ }
+
+ entry = pages->pgt_buf + pages->pgt_buf_offset;
+ pages->pgt_buf_offset += PAGE_SIZE;
+
+ return entry;
+}
+
+/* Used to track our allocated page tables. */
+static struct alloc_pgt_data pgt_data;
+
+/* The top level page table entry pointer. */
+static unsigned long top_level_pgt;
+
+phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
+
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info;
+
+/*
+ * Adds the specified range to the identity mappings.
+ */
+static void add_identity_map(unsigned long start, unsigned long end)
+{
+ int ret;
+
+ /* Align boundary to 2M. */
+ start = round_down(start, PMD_SIZE);
+ end = round_up(end, PMD_SIZE);
+ if (start >= end)
+ return;
+
+ /* Build the mapping. */
+ ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end);
+ if (ret)
+ error("Error: kernel_ident_mapping_init() failed\n");
+}
+
+/* Locates and clears a region for a new top level page table. */
+void initialize_identity_maps(void *rmode)
+{
+ unsigned long cmdline;
+
+ /* Exclude the encryption mask from __PHYSICAL_MASK */
+ physical_mask &= ~sme_me_mask;
+
+ /* Init mapping_info with run-time function/buffer pointers. */
+ mapping_info.alloc_pgt_page = alloc_pgt_page;
+ mapping_info.context = &pgt_data;
+ mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
+ mapping_info.kernpg_flag = _KERNPG_TABLE;
+
+ /*
+ * It should be impossible for this not to already be true,
+ * but since calling this a second time would rewind the other
+ * counters, let's just make sure this is reset too.
+ */
+ pgt_data.pgt_buf_offset = 0;
+
+ /*
+ * If we came here via startup_32(), cr3 will be _pgtable already
+ * and we must append to the existing area instead of entirely
+ * overwriting it.
+ *
+ * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
+ * the top-level page table is allocated separately.
+ *
+ * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
+ * cases. On 4-level paging it's equal to 'top_level_pgt'.
+ */
+ top_level_pgt = read_cr3_pa();
+ if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
+ pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ } else {
+ pgt_data.pgt_buf = _pgtable;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
+ }
+
+ /*
+ * New page-table is set up - map the kernel image, boot_params and the
+ * command line. The uncompressed kernel requires boot_params and the
+ * command line to be mapped in the identity mapping. Map them
+ * explicitly here in case the compressed kernel does not touch them,
+ * or does not touch all the pages covering them.
+ */
+ add_identity_map((unsigned long)_head, (unsigned long)_end);
+ boot_params = rmode;
+ add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
+ cmdline = get_cmd_line_ptr();
+ add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+
+ /* Load the new page-table. */
+ write_cr3(top_level_pgt);
+}
+
+/*
+ * This switches the page tables to the new level4 that has been built
+ * via calls to add_identity_map() above. If booted via startup_32(),
+ * this is effectively a no-op.
+ */
+void finalize_identity_maps(void)
+{
+ write_cr3(top_level_pgt);
+}
+
+static pte_t *split_large_pmd(struct x86_mapping_info *info,
+ pmd_t *pmdp, unsigned long __address)
+{
+ unsigned long page_flags;
+ unsigned long address;
+ pte_t *pte;
+ pmd_t pmd;
+ int i;
+
+ pte = (pte_t *)info->alloc_pgt_page(info->context);
+ if (!pte)
+ return NULL;
+
+ address = __address & PMD_MASK;
+ /* No large page - clear PSE flag */
+ page_flags = info->page_flag & ~_PAGE_PSE;
+
+ /* Populate the PTEs */
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ set_pte(&pte[i], __pte(address | page_flags));
+ address += PAGE_SIZE;
+ }
+
+ /*
+ * Ideally we need to clear the large PMD first and do a TLB
+ * flush before we write the new PMD. But the 2M range of the
+ * PMD might contain the code we execute and/or the stack
+ * we are on, so we can't do that. But that should be safe here
+ * because we are going from large to small mappings and we are
+ * also the only user of the page-table, so there is no chance
+ * of a TLB multihit.
+ */
+ pmd = __pmd((unsigned long)pte | info->kernpg_flag);
+ set_pmd(pmdp, pmd);
+ /* Flush TLB to establish the new PMD */
+ write_cr3(top_level_pgt);
+
+ return pte + pte_index(__address);
+}
+
+static void clflush_page(unsigned long address)
+{
+ unsigned int flush_size;
+ char *cl, *start, *end;
+
+ /*
+ * Hardcode cl-size to 64 - CPUID can't be used here because that might
+ * cause another #VC exception and the GHCB is not ready to use yet.
+ */
+ flush_size = 64;
+ start = (char *)(address & PAGE_MASK);
+ end = start + PAGE_SIZE;
+
+ /*
+ * First make sure there are no pending writes on the cache-lines to
+ * flush.
+ */
+ asm volatile("mfence" : : : "memory");
+
+ for (cl = start; cl != end; cl += flush_size)
+ clflush(cl);
+}
+
+static int set_clr_page_flags(struct x86_mapping_info *info,
+ unsigned long address,
+ pteval_t set, pteval_t clr)
+{
+ pgd_t *pgdp = (pgd_t *)top_level_pgt;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep, pte;
+
+ /*
+ * First make sure there is a PMD mapping for 'address'.
+ * It should already exist, but keep things generic.
+ *
+ * To map the page just read from it and fault it in if there is no
+ * mapping yet. add_identity_map() can't be called here because that
+ * would unconditionally map the address on PMD level, destroying any
+ * PTE-level mappings that might already exist. Use assembly here so
+ * the access won't be optimized away.
+ */
+ asm volatile("mov %[address], %%r9"
+ :: [address] "g" (*(unsigned long *)address)
+ : "r9", "memory");
+
+ /*
+ * The page is mapped at least with PMD size - so skip checks and walk
+ * directly to the PMD.
+ */
+ p4dp = p4d_offset(pgdp, address);
+ pudp = pud_offset(p4dp, address);
+ pmdp = pmd_offset(pudp, address);
+
+ if (pmd_large(*pmdp))
+ ptep = split_large_pmd(info, pmdp, address);
+ else
+ ptep = pte_offset_kernel(pmdp, address);
+
+ if (!ptep)
+ return -ENOMEM;
+
+ /*
+ * Changing encryption attributes of a page requires to flush it from
+ * the caches.
+ */
+ if ((set | clr) & _PAGE_ENC)
+ clflush_page(address);
+
+ /* Update PTE */
+ pte = *ptep;
+ pte = pte_set_flags(pte, set);
+ pte = pte_clear_flags(pte, clr);
+ set_pte(ptep, pte);
+
+ /* Flush TLB after changing encryption attribute */
+ write_cr3(top_level_pgt);
+
+ return 0;
+}
+
+int set_page_decrypted(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC);
+}
+
+int set_page_encrypted(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0);
+}
+
+int set_page_non_present(unsigned long address)
+{
+ return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT);
+}
+
+static void do_pf_error(const char *msg, unsigned long error_code,
+ unsigned long address, unsigned long ip)
+{
+ error_putstr(msg);
+
+ error_putstr("\nError Code: ");
+ error_puthex(error_code);
+ error_putstr("\nCR2: 0x");
+ error_puthex(address);
+ error_putstr("\nRIP relative to _head: 0x");
+ error_puthex(ip - (unsigned long)_head);
+ error_putstr("\n");
+
+ error("Stopping.\n");
+}
+
+void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ unsigned long address = native_read_cr2();
+ unsigned long end;
+ bool ghcb_fault;
+
+ ghcb_fault = sev_es_check_ghcb_fault(address);
+
+ address &= PMD_MASK;
+ end = address + PMD_SIZE;
+
+ /*
+ * Check for unexpected error codes. Unexpected are:
+ * - Faults on present pages
+ * - User faults
+ * - Reserved bits set
+ */
+ if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD))
+ do_pf_error("Unexpected page-fault:", error_code, address, regs->ip);
+ else if (ghcb_fault)
+ do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip);
+
+ /*
+ * Error code is sane - now identity map the 2M region around
+ * the faulting address.
+ */
+ add_identity_map(address, end);
+}
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
new file mode 100644
index 000000000000..804a502ee0d2
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/trap_pf.h>
+#include <asm/segment.h>
+#include <asm/trapnr.h>
+#include "misc.h"
+
+static void set_idt_entry(int vector, void (*handler)(void))
+{
+ unsigned long address = (unsigned long)handler;
+ gate_desc entry;
+
+ memset(&entry, 0, sizeof(entry));
+
+ entry.offset_low = (u16)(address & 0xffff);
+ entry.segment = __KERNEL_CS;
+ entry.bits.type = GATE_TRAP;
+ entry.bits.p = 1;
+ entry.offset_middle = (u16)((address >> 16) & 0xffff);
+ entry.offset_high = (u32)(address >> 32);
+
+ memcpy(&boot_idt[vector], &entry, sizeof(entry));
+}
+
+/* Have this here so we don't need to include <asm/desc.h> */
+static void load_boot_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+/* Setup IDT before kernel jumping to .Lrelocated */
+void load_stage1_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ set_idt_entry(X86_TRAP_VC, boot_stage1_vc);
+
+ load_boot_idt(&boot_idt_desc);
+}
+
+/* Setup IDT after kernel jumping to .Lrelocated */
+void load_stage2_idt(void)
+{
+ boot_idt_desc.address = (unsigned long)boot_idt;
+
+ set_idt_entry(X86_TRAP_PF, boot_page_fault);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+#endif
+
+ load_boot_idt(&boot_idt_desc);
+}
diff --git a/arch/x86/boot/compressed/idt_handlers_64.S b/arch/x86/boot/compressed/idt_handlers_64.S
new file mode 100644
index 000000000000..22890e199f5b
--- /dev/null
+++ b/arch/x86/boot/compressed/idt_handlers_64.S
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Early IDT handler entry points
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#include <asm/segment.h>
+
+/* For ORIG_RAX */
+#include "../../entry/calling.h"
+
+.macro EXCEPTION_HANDLER name function error_code=0
+SYM_FUNC_START(\name)
+
+ /* Build pt_regs */
+ .if \error_code == 0
+ pushq $0
+ .endif
+
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %rax
+ pushq %r8
+ pushq %r9
+ pushq %r10
+ pushq %r11
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+ /* Call handler with pt_regs */
+ movq %rsp, %rdi
+ /* Error code is second parameter */
+ movq ORIG_RAX(%rsp), %rsi
+ call \function
+
+ /* Restore regs */
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+
+ /* Remove error code and return */
+ addq $8, %rsp
+
+ iretq
+SYM_FUNC_END(\name)
+ .endm
+
+ .text
+ .code64
+
+EXCEPTION_HANDLER boot_page_fault do_boot_page_fault error_code=1
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+EXCEPTION_HANDLER boot_stage1_vc do_vc_no_ghcb error_code=1
+EXCEPTION_HANDLER boot_stage2_vc do_boot_stage2_vc error_code=1
+#endif
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index dde7cb3724df..b92fffbe761f 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -36,17 +36,12 @@
#define STATIC
#include <linux/decompress/mm.h>
-#ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled;
-unsigned int pgdir_shift __ro_after_init = 39;
-unsigned int ptrs_per_p4d __ro_after_init = 1;
-#endif
+#define _SETUP
+#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
+#undef _SETUP
extern unsigned long get_cmd_line_ptr(void);
-/* Used by PAGE_KERN* macros: */
-pteval_t __default_kernel_pte_mask __read_mostly = ~0;
-
/* Simplified build-specific string for starting entropy. */
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
@@ -87,8 +82,11 @@ static unsigned long get_boot_seed(void)
static bool memmap_too_large;
-/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
-static unsigned long long mem_limit = ULLONG_MAX;
+/*
+ * Store memory limit: MAXMEM on 64-bit and KERNEL_IMAGE_SIZE on 32-bit.
+ * It may be reduced by "mem=nn[KMG]" or "memmap=nn[KMG]" command line options.
+ */
+static u64 mem_limit;
/* Number of immovable memory regions */
static int num_immovable_mem;
@@ -131,8 +129,7 @@ enum parse_mode {
};
static int
-parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
- enum parse_mode mode)
+parse_memmap(char *p, u64 *start, u64 *size, enum parse_mode mode)
{
char *oldp;
@@ -162,7 +159,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
*/
*size = 0;
} else {
- unsigned long long flags;
+ u64 flags;
/*
* efi_fake_mem=nn@ss:attr the attr specifies
@@ -201,7 +198,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str)
while (str && (i < MAX_MEMMAP_REGIONS)) {
int rc;
- unsigned long long start, size;
+ u64 start, size;
char *k = strchr(str, ',');
if (k)
@@ -214,7 +211,7 @@ static void mem_avoid_memmap(enum parse_mode mode, char *str)
if (start == 0) {
/* Store the specified memory limit if size > 0 */
- if (size > 0)
+ if (size > 0 && size < mem_limit)
mem_limit = size;
continue;
@@ -261,15 +258,15 @@ static void parse_gb_huge_pages(char *param, char *val)
static void handle_mem_options(void)
{
char *args = (char *)get_cmd_line_ptr();
- size_t len = strlen((char *)args);
+ size_t len;
char *tmp_cmdline;
char *param, *val;
u64 mem_size;
- if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
- !strstr(args, "hugepages"))
+ if (!args)
return;
+ len = strnlen(args, COMMAND_LINE_SIZE-1);
tmp_cmdline = malloc(len + 1);
if (!tmp_cmdline)
error("Failed to allocate space for tmp_cmdline");
@@ -284,14 +281,12 @@ static void handle_mem_options(void)
while (*args) {
args = next_arg(args, &param, &val);
/* Stop at -- */
- if (!val && strcmp(param, "--") == 0) {
- warn("Only '--' specified in cmdline");
- goto out;
- }
+ if (!val && strcmp(param, "--") == 0)
+ break;
if (!strcmp(param, "memmap")) {
mem_avoid_memmap(PARSE_MEMMAP, val);
- } else if (strstr(param, "hugepages")) {
+ } else if (IS_ENABLED(CONFIG_X86_64) && strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
char *p = val;
@@ -300,21 +295,23 @@ static void handle_mem_options(void)
continue;
mem_size = memparse(p, &p);
if (mem_size == 0)
- goto out;
+ break;
- mem_limit = mem_size;
+ if (mem_size < mem_limit)
+ mem_limit = mem_size;
} else if (!strcmp(param, "efi_fake_mem")) {
mem_avoid_memmap(PARSE_EFI, val);
}
}
-out:
free(tmp_cmdline);
return;
}
/*
- * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
+ * In theory, KASLR can put the kernel anywhere in the range of [16M, MAXMEM)
+ * on 64-bit, and [16M, KERNEL_IMAGE_SIZE) on 32-bit.
+ *
* The mem_avoid array is used to store the ranges that need to be avoided
* when KASLR searches for an appropriate random address. We must avoid any
* regions that are unsafe to overlap with during decompression, and other
@@ -392,8 +389,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
{
unsigned long init_size = boot_params->hdr.init_size;
u64 initrd_start, initrd_size;
- u64 cmd_line, cmd_line_size;
- char *ptr;
+ unsigned long cmd_line, cmd_line_size;
/*
* Avoid the region that is unsafe to overlap during
@@ -401,8 +397,6 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
*/
mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
- add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
- mem_avoid[MEM_AVOID_ZO_RANGE].size);
/* Avoid initrd. */
initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
@@ -414,22 +408,17 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* No need to set mapping for initrd, it will be handled in VO. */
/* Avoid kernel command line. */
- cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
- cmd_line |= boot_params->hdr.cmd_line_ptr;
+ cmd_line = get_cmd_line_ptr();
/* Calculate size of cmd_line. */
- ptr = (char *)(unsigned long)cmd_line;
- for (cmd_line_size = 0; ptr[cmd_line_size++];)
- ;
- mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
- mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
- add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
- mem_avoid[MEM_AVOID_CMDLINE].size);
+ if (cmd_line) {
+ cmd_line_size = strnlen((char *)cmd_line, COMMAND_LINE_SIZE-1) + 1;
+ mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
+ mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
+ }
/* Avoid boot parameters. */
mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
- add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
- mem_avoid[MEM_AVOID_BOOTPARAMS].size);
/* We don't need to set a mapping for setup_data. */
@@ -438,11 +427,6 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* Enumerate the immovable memory regions */
num_immovable_mem = count_immovable_mem_regions();
-
-#ifdef CONFIG_X86_VERBOSE_BOOTUP
- /* Make sure video RAM can be used. */
- add_identity_map(0, PMD_SIZE);
-#endif
}
/*
@@ -454,7 +438,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
{
int i;
struct setup_data *ptr;
- unsigned long earliest = img->start + img->size;
+ u64 earliest = img->start + img->size;
bool is_overlapping = false;
for (i = 0; i < MEM_AVOID_MAX; i++) {
@@ -499,18 +483,16 @@ static bool mem_avoid_overlap(struct mem_vector *img,
}
struct slot_area {
- unsigned long addr;
- int num;
+ u64 addr;
+ unsigned long num;
};
#define MAX_SLOT_AREA 100
static struct slot_area slot_areas[MAX_SLOT_AREA];
-
+static unsigned int slot_area_index;
static unsigned long slot_max;
-static unsigned long slot_area_index;
-
static void store_slot_info(struct mem_vector *region, unsigned long image_size)
{
struct slot_area slot_area;
@@ -519,13 +501,10 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
return;
slot_area.addr = region->start;
- slot_area.num = (region->size - image_size) /
- CONFIG_PHYSICAL_ALIGN + 1;
+ slot_area.num = 1 + (region->size - image_size) / CONFIG_PHYSICAL_ALIGN;
- if (slot_area.num > 0) {
- slot_areas[slot_area_index++] = slot_area;
- slot_max += slot_area.num;
- }
+ slot_areas[slot_area_index++] = slot_area;
+ slot_max += slot_area.num;
}
/*
@@ -535,57 +514,53 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
static void
process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
{
- unsigned long addr, size = 0;
+ u64 pud_start, pud_end;
+ unsigned long gb_huge_pages;
struct mem_vector tmp;
- int i = 0;
- if (!max_gb_huge_pages) {
+ if (!IS_ENABLED(CONFIG_X86_64) || !max_gb_huge_pages) {
store_slot_info(region, image_size);
return;
}
- addr = ALIGN(region->start, PUD_SIZE);
- /* Did we raise the address above the passed in memory entry? */
- if (addr < region->start + region->size)
- size = region->size - (addr - region->start);
-
- /* Check how many 1GB huge pages can be filtered out: */
- while (size > PUD_SIZE && max_gb_huge_pages) {
- size -= PUD_SIZE;
- max_gb_huge_pages--;
- i++;
- }
+ /* Are there any 1GB pages in the region? */
+ pud_start = ALIGN(region->start, PUD_SIZE);
+ pud_end = ALIGN_DOWN(region->start + region->size, PUD_SIZE);
/* No good 1GB huge pages found: */
- if (!i) {
+ if (pud_start >= pud_end) {
store_slot_info(region, image_size);
return;
}
- /*
- * Skip those 'i'*1GB good huge pages, and continue checking and
- * processing the remaining head or tail part of the passed region
- * if available.
- */
-
- if (addr >= region->start + image_size) {
+ /* Check if the head part of the region is usable. */
+ if (pud_start >= region->start + image_size) {
tmp.start = region->start;
- tmp.size = addr - region->start;
+ tmp.size = pud_start - region->start;
store_slot_info(&tmp, image_size);
}
- size = region->size - (addr - region->start) - i * PUD_SIZE;
- if (size >= image_size) {
- tmp.start = addr + i * PUD_SIZE;
- tmp.size = size;
+ /* Skip the good 1GB pages. */
+ gb_huge_pages = (pud_end - pud_start) >> PUD_SHIFT;
+ if (gb_huge_pages > max_gb_huge_pages) {
+ pud_end = pud_start + (max_gb_huge_pages << PUD_SHIFT);
+ max_gb_huge_pages = 0;
+ } else {
+ max_gb_huge_pages -= gb_huge_pages;
+ }
+
+ /* Check if the tail part of the region is usable. */
+ if (region->start + region->size >= pud_end + image_size) {
+ tmp.start = pud_end;
+ tmp.size = region->start + region->size - pud_end;
store_slot_info(&tmp, image_size);
}
}
-static unsigned long slots_fetch_random(void)
+static u64 slots_fetch_random(void)
{
unsigned long slot;
- int i;
+ unsigned int i;
/* Handle case of no slots stored. */
if (slot_max == 0)
@@ -598,7 +573,7 @@ static unsigned long slots_fetch_random(void)
slot -= slot_areas[i].num;
continue;
}
- return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
+ return slot_areas[i].addr + ((u64)slot * CONFIG_PHYSICAL_ALIGN);
}
if (i == slot_area_index)
@@ -611,49 +586,23 @@ static void __process_mem_region(struct mem_vector *entry,
unsigned long image_size)
{
struct mem_vector region, overlap;
- unsigned long start_orig, end;
- struct mem_vector cur_entry;
-
- /* On 32-bit, ignore entries entirely above our maximum. */
- if (IS_ENABLED(CONFIG_X86_32) && entry->start >= KERNEL_IMAGE_SIZE)
- return;
+ u64 region_end;
- /* Ignore entries entirely below our minimum. */
- if (entry->start + entry->size < minimum)
- return;
-
- /* Ignore entries above memory limit */
- end = min(entry->size + entry->start, mem_limit);
- if (entry->start >= end)
- return;
- cur_entry.start = entry->start;
- cur_entry.size = end - entry->start;
-
- region.start = cur_entry.start;
- region.size = cur_entry.size;
+ /* Enforce minimum and memory limit. */
+ region.start = max_t(u64, entry->start, minimum);
+ region_end = min(entry->start + entry->size, mem_limit);
/* Give up if slot area array is full. */
while (slot_area_index < MAX_SLOT_AREA) {
- start_orig = region.start;
-
- /* Potentially raise address to minimum location. */
- if (region.start < minimum)
- region.start = minimum;
-
/* Potentially raise address to meet alignment needs. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Did we raise the address above the passed in memory entry? */
- if (region.start > cur_entry.start + cur_entry.size)
+ if (region.start > region_end)
return;
/* Reduce size by any delta from the original address. */
- region.size -= region.start - start_orig;
-
- /* On 32-bit, reduce region size to fit within max size. */
- if (IS_ENABLED(CONFIG_X86_32) &&
- region.start + region.size > KERNEL_IMAGE_SIZE)
- region.size = KERNEL_IMAGE_SIZE - region.start;
+ region.size = region_end - region.start;
/* Return if region can't contain decompressed kernel */
if (region.size < image_size)
@@ -666,27 +615,19 @@ static void __process_mem_region(struct mem_vector *entry,
}
/* Store beginning of region if holds at least image_size. */
- if (overlap.start > region.start + image_size) {
- struct mem_vector beginning;
-
- beginning.start = region.start;
- beginning.size = overlap.start - region.start;
- process_gb_huge_pages(&beginning, image_size);
+ if (overlap.start >= region.start + image_size) {
+ region.size = overlap.start - region.start;
+ process_gb_huge_pages(&region, image_size);
}
- /* Return if overlap extends to or past end of region. */
- if (overlap.start + overlap.size >= region.start + region.size)
- return;
-
/* Clip off the overlapping region and start over. */
- region.size -= overlap.start - region.start + overlap.size;
region.start = overlap.start + overlap.size;
}
}
static bool process_mem_region(struct mem_vector *region,
- unsigned long long minimum,
- unsigned long long image_size)
+ unsigned long minimum,
+ unsigned long image_size)
{
int i;
/*
@@ -709,7 +650,7 @@ static bool process_mem_region(struct mem_vector *region,
* immovable memory and @region.
*/
for (i = 0; i < num_immovable_mem; i++) {
- unsigned long long start, end, entry_end, region_end;
+ u64 start, end, entry_end, region_end;
struct mem_vector entry;
if (!mem_overlaps(region, &immovable_mem[i]))
@@ -736,8 +677,8 @@ static bool process_mem_region(struct mem_vector *region,
#ifdef CONFIG_EFI
/*
- * Returns true if mirror region found (and must have been processed
- * for slots adding)
+ * Returns true if we processed the EFI memmap, which we prefer over the E820
+ * table if it is available.
*/
static bool
process_efi_entries(unsigned long minimum, unsigned long image_size)
@@ -839,20 +780,30 @@ static void process_e820_entries(unsigned long minimum,
static unsigned long find_random_phys_addr(unsigned long minimum,
unsigned long image_size)
{
+ u64 phys_addr;
+
+ /* Bail out early if it's impossible to succeed. */
+ if (minimum + image_size > mem_limit)
+ return 0;
+
/* Check if we had too many memmaps. */
if (memmap_too_large) {
debug_putstr("Aborted memory entries scan (more than 4 memmap= args)!\n");
return 0;
}
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+ if (!process_efi_entries(minimum, image_size))
+ process_e820_entries(minimum, image_size);
- if (process_efi_entries(minimum, image_size))
- return slots_fetch_random();
+ phys_addr = slots_fetch_random();
- process_e820_entries(minimum, image_size);
- return slots_fetch_random();
+ /* Perform a final check to make sure the address is in range. */
+ if (phys_addr < minimum || phys_addr + image_size > mem_limit) {
+ warn("Invalid physical address chosen!\n");
+ return 0;
+ }
+
+ return (unsigned long)phys_addr;
}
static unsigned long find_random_virt_addr(unsigned long minimum,
@@ -860,18 +811,12 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
{
unsigned long slots, random_addr;
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
- /* Align image_size for easy slot calculations. */
- image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
-
/*
* There are how many CONFIG_PHYSICAL_ALIGN-sized slots
* that can hold image_size within the range of minimum to
* KERNEL_IMAGE_SIZE?
*/
- slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
- CONFIG_PHYSICAL_ALIGN + 1;
+ slots = 1 + (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN;
random_addr = kaslr_get_random_long("Virtual") % slots;
@@ -895,18 +840,12 @@ void choose_random_location(unsigned long input,
return;
}
-#ifdef CONFIG_X86_5LEVEL
- if (__read_cr4() & X86_CR4_LA57) {
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
- }
-#endif
-
boot_params->hdr.loadflags |= KASLR_FLAG;
- /* Prepare to add new identity pagetables on demand. */
- initialize_identity_maps();
+ if (IS_ENABLED(CONFIG_X86_32))
+ mem_limit = KERNEL_IMAGE_SIZE;
+ else
+ mem_limit = MAXMEM;
/* Record the various known unsafe memory ranges. */
mem_avoid_init(input, input_size, *output);
@@ -917,6 +856,8 @@ void choose_random_location(unsigned long input,
* location:
*/
min_addr = min(*output, 512UL << 20);
+ /* Make sure minimum is aligned. */
+ min_addr = ALIGN(min_addr, CONFIG_PHYSICAL_ALIGN);
/* Walk available memory entries to find a random address. */
random_addr = find_random_phys_addr(min_addr, output_size);
@@ -924,19 +865,8 @@ void choose_random_location(unsigned long input,
warn("Physical KASLR disabled: no suitable memory region!");
} else {
/* Update the new physical address location. */
- if (*output != random_addr) {
- add_identity_map(random_addr, output_size);
+ if (*output != random_addr)
*output = random_addr;
- }
-
- /*
- * This loads the identity mapping page table.
- * This should only be done if a new physical address
- * is found for the kernel, otherwise we should keep
- * the old page table to make it be like the "nokaslr"
- * case.
- */
- finalize_identity_maps();
}
diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c
deleted file mode 100644
index f9c5c13d979b..000000000000
--- a/arch/x86/boot/compressed/kaslr_64.c
+++ /dev/null
@@ -1,153 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This code is used on x86_64 to create page table identity mappings on
- * demand by building up a new set of page tables (or appending to the
- * existing ones), and then switching over to them when ready.
- *
- * Copyright (C) 2015-2016 Yinghai Lu
- * Copyright (C) 2016 Kees Cook
- */
-
-/*
- * Since we're dealing with identity mappings, physical and virtual
- * addresses are the same, so override these defines which are ultimately
- * used by the headers in misc.h.
- */
-#define __pa(x) ((unsigned long)(x))
-#define __va(x) ((void *)((unsigned long)(x)))
-
-/* No PAGE_TABLE_ISOLATION support needed either: */
-#undef CONFIG_PAGE_TABLE_ISOLATION
-
-#include "misc.h"
-
-/* These actually do the work of building the kernel identity maps. */
-#include <linux/pgtable.h>
-#include <asm/init.h>
-/* Use the static base for this part of the boot process */
-#undef __PAGE_OFFSET
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE
-#include "../../mm/ident_map.c"
-
-/* Used to track our page table allocation area. */
-struct alloc_pgt_data {
- unsigned char *pgt_buf;
- unsigned long pgt_buf_size;
- unsigned long pgt_buf_offset;
-};
-
-/*
- * Allocates space for a page table entry, using struct alloc_pgt_data
- * above. Besides the local callers, this is used as the allocation
- * callback in mapping_info below.
- */
-static void *alloc_pgt_page(void *context)
-{
- struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
- unsigned char *entry;
-
- /* Validate there is space available for a new page. */
- if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
- debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
- debug_putaddr(pages->pgt_buf_offset);
- debug_putaddr(pages->pgt_buf_size);
- return NULL;
- }
-
- entry = pages->pgt_buf + pages->pgt_buf_offset;
- pages->pgt_buf_offset += PAGE_SIZE;
-
- return entry;
-}
-
-/* Used to track our allocated page tables. */
-static struct alloc_pgt_data pgt_data;
-
-/* The top level page table entry pointer. */
-static unsigned long top_level_pgt;
-
-phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
-
-/*
- * Mapping information structure passed to kernel_ident_mapping_init().
- * Due to relocation, pointers must be assigned at run time not build time.
- */
-static struct x86_mapping_info mapping_info;
-
-/* Locates and clears a region for a new top level page table. */
-void initialize_identity_maps(void)
-{
- /* If running as an SEV guest, the encryption mask is required. */
- set_sev_encryption_mask();
-
- /* Exclude the encryption mask from __PHYSICAL_MASK */
- physical_mask &= ~sme_me_mask;
-
- /* Init mapping_info with run-time function/buffer pointers. */
- mapping_info.alloc_pgt_page = alloc_pgt_page;
- mapping_info.context = &pgt_data;
- mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
- mapping_info.kernpg_flag = _KERNPG_TABLE;
-
- /*
- * It should be impossible for this not to already be true,
- * but since calling this a second time would rewind the other
- * counters, let's just make sure this is reset too.
- */
- pgt_data.pgt_buf_offset = 0;
-
- /*
- * If we came here via startup_32(), cr3 will be _pgtable already
- * and we must append to the existing area instead of entirely
- * overwriting it.
- *
- * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
- * the top-level page table is allocated separately.
- *
- * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
- * cases. On 4-level paging it's equal to 'top_level_pgt'.
- */
- top_level_pgt = read_cr3_pa();
- if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
- debug_putstr("booted via startup_32()\n");
- pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
- pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
- memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
- } else {
- debug_putstr("booted via startup_64()\n");
- pgt_data.pgt_buf = _pgtable;
- pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
- memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
- top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
- }
-}
-
-/*
- * Adds the specified range to what will become the new identity mappings.
- * Once all ranges have been added, the new mapping is activated by calling
- * finalize_identity_maps() below.
- */
-void add_identity_map(unsigned long start, unsigned long size)
-{
- unsigned long end = start + size;
-
- /* Align boundary to 2M. */
- start = round_down(start, PMD_SIZE);
- end = round_up(end, PMD_SIZE);
- if (start >= end)
- return;
-
- /* Build the mapping. */
- kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
- start, end);
-}
-
-/*
- * This switches the page tables to the new level4 that has been built
- * via calls to add_identity_map() above. If booted via startup_32(),
- * this is effectively a no-op.
- */
-void finalize_identity_maps(void)
-{
- write_cr3(top_level_pgt);
-}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index e478e40fbe5a..267e7f93050e 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -442,6 +442,13 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
parse_elf(output);
handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel.\n");
+
+ /*
+ * Flush GHCB from cache and map it encrypted again when running as
+ * SEV-ES guest.
+ */
+ sev_es_shutdown_ghcb();
+
return output;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 726e264410ff..6d31f1b4c4d1 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,6 +23,7 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
+#include <asm/desc_defs.h>
#define BOOT_CTYPE_H
#include <linux/acpi.h>
@@ -36,6 +37,9 @@
#define memptr unsigned
#endif
+/* boot/compressed/vmlinux start and end markers */
+extern char _head[], _end[];
+
/* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
@@ -70,8 +74,8 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
struct mem_vector {
- unsigned long long start;
- unsigned long long size;
+ u64 start;
+ u64 size;
};
#if CONFIG_RANDOMIZE_BASE
@@ -81,8 +85,6 @@ void choose_random_location(unsigned long input,
unsigned long *output,
unsigned long output_size,
unsigned long *virt_addr);
-/* cpuflags.c */
-bool has_cpuflag(int flag);
#else
static inline void choose_random_location(unsigned long input,
unsigned long input_size,
@@ -93,18 +95,14 @@ static inline void choose_random_location(unsigned long input,
}
#endif
+/* cpuflags.c */
+bool has_cpuflag(int flag);
+
#ifdef CONFIG_X86_64
-void initialize_identity_maps(void);
-void add_identity_map(unsigned long start, unsigned long size);
-void finalize_identity_maps(void);
+extern int set_page_decrypted(unsigned long address);
+extern int set_page_encrypted(unsigned long address);
+extern int set_page_non_present(unsigned long address);
extern unsigned char _pgtable[];
-#else
-static inline void initialize_identity_maps(void)
-{ }
-static inline void add_identity_map(unsigned long start, unsigned long size)
-{ }
-static inline void finalize_identity_maps(void)
-{ }
#endif
#ifdef CONFIG_EARLY_PRINTK
@@ -119,6 +117,17 @@ static inline void console_init(void)
void set_sev_encryption_mask(void);
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+void sev_es_shutdown_ghcb(void);
+extern bool sev_es_check_ghcb_fault(unsigned long address);
+#else
+static inline void sev_es_shutdown_ghcb(void) { }
+static inline bool sev_es_check_ghcb_fault(unsigned long address)
+{
+ return false;
+}
+#endif
+
/* acpi.c */
#ifdef CONFIG_ACPI
acpi_physical_address get_rsdp_addr(void);
@@ -133,4 +142,21 @@ int count_immovable_mem_regions(void);
static inline int count_immovable_mem_regions(void) { return 0; }
#endif
+/* ident_map_64.c */
+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
+#endif
+
+/* Used by PAGE_KERN* macros: */
+extern pteval_t __default_kernel_pte_mask;
+
+/* idt_64.c */
+extern gate_desc boot_idt[BOOT_IDT_ENTRIES];
+extern struct desc_ptr boot_idt_desc;
+
+/* IDT Entry Points */
+void boot_page_fault(void);
+void boot_stage1_vc(void);
+void boot_stage2_vc(void);
+
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 7e01248765b2..52aa56cdbacc 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -60,6 +60,12 @@ int main(int argc, char *argv[])
printf(".incbin \"%s\"\n", argv[1]);
printf("input_data_end:\n");
+ printf(".section \".rodata\",\"a\",@progbits\n");
+ printf(".globl input_len\n");
+ printf("input_len:\n\t.long %lu\n", ilen);
+ printf(".globl output_len\n");
+ printf("output_len:\n\t.long %lu\n", (unsigned long)olen);
+
retval = 0;
bail:
if (f)
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index c8862696a47b..2a78746f5a4c 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -5,18 +5,16 @@
#include "pgtable.h"
#include "../string.h"
-/*
- * __force_order is used by special_insns.h asm code to force instruction
- * serialization.
- *
- * It is not referenced from the code, but GCC < 5 with -fPIE would fail
- * due to an undefined symbol. Define it to make these ancient GCCs work.
- */
-unsigned long __force_order;
-
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
+#ifdef CONFIG_X86_5LEVEL
+/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
+unsigned int __section(".data") __pgtable_l5_enabled;
+unsigned int __section(".data") pgdir_shift = 39;
+unsigned int __section(".data") ptrs_per_p4d = 1;
+#endif
+
struct paging_config {
unsigned long trampoline_start;
unsigned long l5_required;
@@ -32,7 +30,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
* Avoid putting the pointer into .bss as it will be cleared between
* paging_prepare() and extract_kernel().
*/
-unsigned long *trampoline_32bit __section(.data);
+unsigned long *trampoline_32bit __section(".data");
extern struct boot_params *boot_params;
int cmdline_find_option_bool(const char *option);
@@ -207,4 +205,13 @@ void cleanup_trampoline(void *pgtable)
/* Restore trampoline memory */
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
+
+ /* Initialize variables for 5-level paging */
+#ifdef CONFIG_X86_5LEVEL
+ if (__read_cr4() & X86_CR4_LA57) {
+ __pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ }
+#endif
}
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
new file mode 100644
index 000000000000..954cb2702e23
--- /dev/null
+++ b/arch/x86/boot/compressed/sev-es.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+/*
+ * misc.h needs to be first because it knows how to include the other kernel
+ * headers in the pre-decompression code in a way that does not break
+ * compilation.
+ */
+#include "misc.h"
+
+#include <asm/pgtable_types.h>
+#include <asm/sev-es.h>
+#include <asm/trapnr.h>
+#include <asm/trap_pf.h>
+#include <asm/msr-index.h>
+#include <asm/fpu/xcr.h>
+#include <asm/ptrace.h>
+#include <asm/svm.h>
+
+#include "error.h"
+
+struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
+struct ghcb *boot_ghcb;
+
+/*
+ * Copy a version of this function here - insn-eval.c can't be used in
+ * pre-decompression code.
+ */
+static bool insn_has_rep_prefix(struct insn *insn)
+{
+ int i;
+
+ insn_get_prefixes(insn);
+
+ for (i = 0; i < insn->prefixes.nbytes; i++) {
+ insn_byte_t p = insn->prefixes.bytes[i];
+
+ if (p == 0xf2 || p == 0xf3)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
+ * doesn't use segments.
+ */
+static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
+{
+ return 0UL;
+}
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+ unsigned long low, high;
+
+ asm volatile("rdmsr" : "=a" (low), "=d" (high) :
+ "c" (MSR_AMD64_SEV_ES_GHCB));
+
+ return ((high << 32) | low);
+}
+
+static inline void sev_es_wr_ghcb_msr(u64 val)
+{
+ u32 low, high;
+
+ low = val & 0xffffffffUL;
+ high = val >> 32;
+
+ asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
+ "a"(low), "d" (high) : "memory");
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ enum es_result ret;
+
+ memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+
+ insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
+ insn_get_length(&ctxt->insn);
+
+ ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+
+ return ret;
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ void *dst, char *buf, size_t size)
+{
+ memcpy(dst, buf, size);
+
+ return ES_OK;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ void *src, char *buf, size_t size)
+{
+ memcpy(buf, src, size);
+
+ return ES_OK;
+}
+
+#undef __init
+#undef __pa
+#define __init
+#define __pa(x) ((unsigned long)(x))
+
+#define __BOOT_COMPRESSED
+
+/* Basic instruction decoding support needed */
+#include "../../lib/inat.c"
+#include "../../lib/insn.c"
+
+/* Include code for early handlers */
+#include "../../kernel/sev-es-shared.c"
+
+static bool early_setup_sev_es(void)
+{
+ if (!sev_es_negotiate_protocol())
+ sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED);
+
+ if (set_page_decrypted((unsigned long)&boot_ghcb_page))
+ return false;
+
+ /* Page is now mapped decrypted, clear it */
+ memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
+
+ boot_ghcb = &boot_ghcb_page;
+
+ /* Initialize lookup tables for the instruction decoder */
+ inat_init_tables();
+
+ return true;
+}
+
+void sev_es_shutdown_ghcb(void)
+{
+ if (!boot_ghcb)
+ return;
+
+ if (!sev_es_check_cpu_features())
+ error("SEV-ES CPU Features missing.");
+
+ /*
+ * GHCB Page must be flushed from the cache and mapped encrypted again.
+ * Otherwise the running kernel will see strange cache effects when
+ * trying to use that page.
+ */
+ if (set_page_encrypted((unsigned long)&boot_ghcb_page))
+ error("Can't map GHCB page encrypted");
+
+ /*
+ * GHCB page is mapped encrypted again and flushed from the cache.
+ * Mark it non-present now to catch bugs when #VC exceptions trigger
+ * after this point.
+ */
+ if (set_page_non_present((unsigned long)&boot_ghcb_page))
+ error("Can't unmap GHCB page");
+}
+
+bool sev_es_check_ghcb_fault(unsigned long address)
+{
+ /* Check whether the fault was on the GHCB page */
+ return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
+}
+
+void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
+{
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+
+ if (!boot_ghcb && !early_setup_sev_es())
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+ vc_ghcb_invalidate(boot_ghcb);
+ result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+ if (result != ES_OK)
+ goto finish;
+
+ switch (exit_code) {
+ case SVM_EXIT_RDTSC:
+ case SVM_EXIT_RDTSCP:
+ result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
+ break;
+ case SVM_EXIT_IOIO:
+ result = vc_handle_ioio(boot_ghcb, &ctxt);
+ break;
+ case SVM_EXIT_CPUID:
+ result = vc_handle_cpuid(boot_ghcb, &ctxt);
+ break;
+ default:
+ result = ES_UNSUPPORTED;
+ break;
+ }
+
+finish:
+ if (result == ES_OK) {
+ vc_finish_insn(&ctxt);
+ } else if (result != ES_RETRY) {
+ /*
+ * For now, just halt the machine. That makes debugging easier,
+ * later we just call sev_es_terminate() here.
+ */
+ while (true)
+ asm volatile("hlt\n");
+ }
+}
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 8f1025d1f681..112b2375d021 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -42,12 +42,6 @@ SECTIONS
*(.rodata.*)
_erodata = . ;
}
- .got : {
- _got = .;
- KEEP(*(.got.plt))
- KEEP(*(.got))
- _egot = .;
- }
.data : {
_data = . ;
*(.data)
@@ -75,5 +69,49 @@ SECTIONS
. = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */
_end = .;
+ STABS_DEBUG
+ DWARF_DEBUG
+ ELF_DETAILS
+
DISCARDS
+ /DISCARD/ : {
+ *(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
+ *(.hash) *(.gnu.hash)
+ *(.note.*)
+ }
+
+ .got.plt (INFO) : {
+ *(.got.plt)
+ }
+ ASSERT(SIZEOF(.got.plt) == 0 ||
+#ifdef CONFIG_X86_64
+ SIZEOF(.got.plt) == 0x18,
+#else
+ SIZEOF(.got.plt) == 0xc,
+#endif
+ "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .got : {
+ *(.got)
+ }
+ ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!")
+
+ .plt : {
+ *(.plt) *(.plt.*)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+
+ .rel.dyn : {
+ *(.rel.*) *(.rel_*)
+ }
+ ASSERT(SIZEOF(.rel.dyn) == 0, "Unexpected run-time relocations (.rel) detected!")
+
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
}
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 24c95522f231..49546c247ae2 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -20,7 +20,7 @@ SECTIONS
.initdata : { *(.initdata) }
__end_init = .;
- .text : { *(.text) }
+ .text : { *(.text .text.*) }
.text32 : { *(.text32) }
. = ALIGN(16);
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index c8b8c1a8d1fc..a3725ad46c5a 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -416,8 +416,6 @@ int main(int argc, char ** argv)
/* Set the default root device */
put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
- printf("Setup is %d bytes (padded to %d bytes).\n", c, i);
-
/* Open and stat the kernel file */
fd = open(argv[2], O_RDONLY);
if (fd < 0)
@@ -425,7 +423,6 @@ int main(int argc, char ** argv)
if (fstat(fd, &sb))
die("Unable to stat `%s': %m", argv[2]);
sz = sb.st_size;
- printf("System is %d kB\n", (sz+1023)/1024);
kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
if (kernel == MAP_FAILED)
die("Unable to mmap '%s': %m", argv[2]);
@@ -488,7 +485,6 @@ int main(int argc, char ** argv)
}
/* Write the CRC */
- printf("CRC %x\n", crc);
put_unaligned_le32(crc, buf);
if (fwrite(buf, 1, 4, dest) != 4)
die("Writing CRC failed");
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 1fedabdb95ad..f7eb976b0a4b 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -25,7 +25,7 @@ int early_serial_base;
* error during initialization.
*/
-static void __attribute__((section(".inittext"))) serial_putchar(int ch)
+static void __section(".inittext") serial_putchar(int ch)
{
unsigned timeout = 0xffff;
@@ -35,7 +35,7 @@ static void __attribute__((section(".inittext"))) serial_putchar(int ch)
outb(ch, early_serial_base + TXR);
}
-static void __attribute__((section(".inittext"))) bios_putchar(int ch)
+static void __section(".inittext") bios_putchar(int ch)
{
struct biosregs ireg;
@@ -47,7 +47,7 @@ static void __attribute__((section(".inittext"))) bios_putchar(int ch)
intcall(0x10, &ireg, NULL);
}
-void __attribute__((section(".inittext"))) putchar(int ch)
+void __section(".inittext") putchar(int ch)
{
if (ch == '\n')
putchar('\r'); /* \n -> \r\n */
@@ -58,7 +58,7 @@ void __attribute__((section(".inittext"))) putchar(int ch)
serial_putchar(ch);
}
-void __attribute__((section(".inittext"))) puts(const char *str)
+void __section(".inittext") puts(const char *str)
{
while (*str)
putchar(*str++);
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index cbf7fed22441..04bde0bb2003 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -78,7 +78,7 @@ struct card_info {
u16 xmode_n; /* Size of unprobed mode range */
};
-#define __videocard struct card_info __attribute__((used,section(".videocards")))
+#define __videocard struct card_info __section(".videocards") __attribute__((used))
extern struct card_info video_cards[], video_cards_end[];
int mode_defined(u16 mode); /* video.c */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index d7577fece9eb..78210793d357 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -19,6 +19,7 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
+# CONFIG_64BIT is not set
CONFIG_SMP=y
CONFIG_X86_GENERIC=y
CONFIG_HPET_TIMER=y
@@ -186,7 +187,6 @@ CONFIG_DRM_I915=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index f85600143747..9936528e1939 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -181,7 +181,6 @@ CONFIG_DRM_I915=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index 6737bcea1fa1..c025a01cf708 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -11,6 +11,7 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index e67a59130025..7b3a1cf0984b 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -12,6 +12,7 @@
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index d2d069bd459b..feccb5254c7e 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -28,9 +28,9 @@
#define SCALE_F sizeof(unsigned long)
#ifdef CONFIG_X86_64
-#define REX_PRE "0x48, "
+#define CRC32_INST "crc32q %1, %q0"
#else
-#define REX_PRE
+#define CRC32_INST "crc32l %1, %0"
#endif
#ifdef CONFIG_X86_64
@@ -48,11 +48,8 @@ asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
- __asm__ __volatile__(
- ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
- :"=S"(crc)
- :"0"(crc), "c"(*data)
- );
+ asm("crc32b %1, %0"
+ : "+r" (crc) : "rm" (*data));
data++;
}
@@ -66,11 +63,8 @@ static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len
unsigned long *ptmp = (unsigned long *)p;
while (iquotient--) {
- __asm__ __volatile__(
- ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
- :"=S"(crc)
- :"0"(crc), "c"(*ptmp)
- );
+ asm(CRC32_INST
+ : "+r" (crc) : "rm" (*ptmp));
ptmp++;
}
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index 8acbb6584a37..5af8021b98ce 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -11,6 +11,7 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/scatterlist.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
@@ -45,11 +46,11 @@ static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
asm volatile(
/* Clear registers to propagate the carry bit */
- " xor %%r8, %%r8;"
- " xor %%r9, %%r9;"
- " xor %%r10, %%r10;"
- " xor %%r11, %%r11;"
- " xor %1, %1;"
+ " xor %%r8d, %%r8d;"
+ " xor %%r9d, %%r9d;"
+ " xor %%r10d, %%r10d;"
+ " xor %%r11d, %%r11d;"
+ " xor %k1, %k1;"
/* Begin addition chain */
" addq 0(%3), %0;"
@@ -93,7 +94,7 @@ static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
" cmovc %0, %%rax;"
/* Step 2: Add carry*38 to the original sum */
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" add %%rax, %%r8;"
" adcx %%rcx, %%r9;"
" movq %%r9, 8(%1);"
@@ -165,28 +166,28 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Compute src1[0] * src2 */
" movq 0(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 8(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 16(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 24(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
@@ -200,7 +201,7 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %3, %3;"
+ " xor %k3, %k3;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -246,28 +247,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Compute src1[0] * src2 */
" movq 0(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 0(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 8(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 16(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 24(%1), %%rdx;"
- " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
+ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
" mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
" mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
" mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
@@ -277,29 +278,29 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Compute src1[0] * src2 */
" movq 32(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " movq %%r8, 64(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;"
/* Compute src1[1] * src2 */
" movq 40(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[2] * src2 */
" movq 48(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
/* Compute src1[3] * src2 */
" movq 56(%1), %%rdx;"
- " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
+ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10d, %%r10d;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
+ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
" mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
" mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
" adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
@@ -312,7 +313,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %3, %3;"
+ " xor %k3, %k3;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -345,7 +346,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 96(%1), %%r8, %%r13;"
- " xor %3, %3;"
+ " xor %k3, %k3;"
" adoxq 64(%1), %%r8;"
" mulxq 104(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -516,7 +517,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute all partial products */
" movq 0(%1), %%rdx;" /* f[0] */
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 24(%1), %%rdx;" /* f[3] */
@@ -526,7 +527,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
- " xor %%r15, %%r15;"
+ " xor %%r15d, %%r15d;"
" adox %%rax, %%r10;"
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
@@ -563,7 +564,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -607,7 +608,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
asm volatile(
/* Step 1: Compute all partial products */
" movq 0(%1), %%rdx;" /* f[0] */
- " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
" mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 24(%1), %%rdx;" /* f[3] */
@@ -617,7 +618,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
- " xor %%r15, %%r15;"
+ " xor %%r15d, %%r15d;"
" adox %%rax, %%r10;"
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
@@ -647,7 +648,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute all partial products */
" movq 32(%1), %%rdx;" /* f[0] */
- " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
+ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15d, %%r15d;" /* f[1]*f[0] */
" mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
" mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
" movq 56(%1), %%rdx;" /* f[3] */
@@ -657,7 +658,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
" mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
/* Step 2: Compute two parallel carry chains */
- " xor %%r15, %%r15;"
+ " xor %%r15d, %%r15d;"
" adox %%rax, %%r10;"
" adcx %%r8, %%r8;"
" adox %%rcx, %%r11;"
@@ -692,7 +693,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 32(%1), %%r8, %%r13;"
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" adoxq 0(%1), %%r8;"
" mulxq 40(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
@@ -725,7 +726,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
/* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
" mov $38, %%rdx;"
" mulxq 96(%1), %%r8, %%r13;"
- " xor %%rcx, %%rcx;"
+ " xor %%ecx, %%ecx;"
" adoxq 64(%1), %%r8;"
" mulxq 104(%1), %%r9, %%rbx;"
" adcx %%r13, %%r9;"
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index 80fcb85736e1..8ea5ab0f1ca7 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -10,6 +10,7 @@
#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/simd.h>
asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index cc6b7c1a2705..2b353d42ed13 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -10,6 +10,7 @@
#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/simd.h>
asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 137edcf038cb..7d568012cc15 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
___
&declare_function("poly1305_init_x86_64", 32, 3);
$code.=<<___;
- xor %rax,%rax
+ xor %eax,%eax
mov %rax,0($ctx) # initialize hash value
mov %rax,8($ctx)
mov %rax,16($ctx)
@@ -2853,7 +2853,7 @@ $code.=<<___;
.type poly1305_init_base2_44,\@function,3
.align 32
poly1305_init_base2_44:
- xor %rax,%rax
+ xor %eax,%eax
mov %rax,0($ctx) # initialize hash value
mov %rax,8($ctx)
mov %rax,16($ctx)
@@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
mov \$16,$len
sub %r10,$len
xor %eax,%eax
- xor %r11,%r11
+ xor %r11d,%r11d
.Loop_dec_byte:
mov ($inp,$otp),%r11b
mov ($otp),%al
@@ -4085,7 +4085,7 @@ avx_handler:
.long 0xa548f3fc # cld; rep movsq
mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ xor %ecx,%ecx # arg1, UNW_FLAG_NHANDLER
mov 8(%rsi),%rdx # arg2, disp->ImageBase
mov 0(%rsi),%r8 # arg3, disp->ControlPc
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index dfe921efa9b2..c44aba290fbb 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -11,6 +11,7 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/sizes.h>
#include <asm/intel-family.h>
#include <asm/simd.h>
@@ -157,8 +158,6 @@ static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
dctx->s[1] = get_unaligned_le32(&inp[4]);
dctx->s[2] = get_unaligned_le32(&inp[8]);
dctx->s[3] = get_unaligned_le32(&inp[12]);
- inp += POLY1305_BLOCK_SIZE;
- len -= POLY1305_BLOCK_SIZE;
acc += POLY1305_BLOCK_SIZE;
dctx->sset = true;
}
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index ae9b0d4615b3..07a9331d55e7 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -6,7 +6,6 @@
#include <asm/percpu.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
-#include <asm/inst.h>
/*
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 2f84c7ca74ea..870efeec8bda 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -299,7 +299,7 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
old_regs = set_irq_regs(regs);
instrumentation_begin();
- run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs);
+ run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
instrumentation_begin();
set_irq_regs(old_regs);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 70dea9337816..cad08703c4ad 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -46,13 +46,13 @@
.code64
.section .entry.text, "ax"
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
SYM_CODE_START(native_usergs_sysret64)
UNWIND_HINT_EMPTY
swapgs
sysretq
SYM_CODE_END(native_usergs_sysret64)
-#endif /* CONFIG_PARAVIRT */
+#endif /* CONFIG_PARAVIRT_XXL */
/*
* 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
@@ -101,6 +101,8 @@ SYM_CODE_START(entry_SYSCALL_64)
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
+
/* Construct struct pt_regs on stack */
pushq $__USER_DS /* pt_regs->ss */
pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2) /* pt_regs->sp */
@@ -446,6 +448,84 @@ _ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym)
.endm
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/**
+ * idtentry_vc - Macro to generate entry stub for #VC
+ * @vector: Vector number
+ * @asmsym: ASM symbol for the entry point
+ * @cfunc: C function to be called
+ *
+ * The macro emits code to set up the kernel context for #VC. The #VC handler
+ * runs on an IST stack and needs to be able to cause nested #VC exceptions.
+ *
+ * To make this work the #VC entry code tries its best to pretend it doesn't use
+ * an IST stack by switching to the task stack if coming from user-space (which
+ * includes early SYSCALL entry path) or back to the stack in the IRET frame if
+ * entered from kernel-mode.
+ *
+ * If entered from kernel-mode the return stack is validated first, and if it is
+ * not safe to use (e.g. because it points to the entry stack) the #VC handler
+ * will switch to a fall-back stack (VC2) and call a special handler function.
+ *
+ * The macro is only used for one vector, but it is planned to be extended in
+ * the future for the #HV exception.
+ */
+.macro idtentry_vc vector asmsym cfunc
+SYM_CODE_START(\asmsym)
+ UNWIND_HINT_IRET_REGS
+ ASM_CLAC
+
+ /*
+ * If the entry is from userspace, switch stacks and treat it as
+ * a normal entry.
+ */
+ testb $3, CS-ORIG_RAX(%rsp)
+ jnz .Lfrom_usermode_switch_stack_\@
+
+ /*
+ * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX.
+ * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS
+ */
+ call paranoid_entry
+
+ UNWIND_HINT_REGS
+
+ /*
+ * Switch off the IST stack to make it free for nested exceptions. The
+ * vc_switch_off_ist() function will switch back to the interrupted
+ * stack if it is safe to do so. If not it switches to the VC fall-back
+ * stack.
+ */
+ movq %rsp, %rdi /* pt_regs pointer */
+ call vc_switch_off_ist
+ movq %rax, %rsp /* Switch to new stack */
+
+ UNWIND_HINT_REGS
+
+ /* Update pt_regs */
+ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+
+ movq %rsp, %rdi /* pt_regs pointer */
+
+ call \cfunc
+
+ /*
+ * No need to switch back to the IST stack. The current stack is either
+ * identical to the stack in the IRET frame or the VC fall-back stack,
+ * so it is definitly mapped even with PTI enabled.
+ */
+ jmp paranoid_exit
+
+ /* Switch to the regular task stack */
+.Lfrom_usermode_switch_stack_\@:
+ idtentry_body safe_stack_\cfunc, has_error_code=1
+
+_ASM_NOKPROBE(\asmsym)
+SYM_CODE_END(\asmsym)
+.endm
+#endif
+
/*
* Double fault entry. Straight paranoid. No checks from which context
* this comes because for the espfix induced #DF this would do the wrong
@@ -682,6 +762,8 @@ SYM_CODE_END(.Lbad_gs)
* rdx: Function argument (can be NULL if none)
*/
SYM_FUNC_START(asm_call_on_stack)
+SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL)
+SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL)
/*
* Save the frame pointer unconditionally. This allows the ORC
* unwinder to handle the stack switch.
@@ -840,8 +922,9 @@ SYM_CODE_START_LOCAL(paranoid_entry)
* retrieve and set the current CPUs kernel GSBASE. The stored value
* has to be restored in paranoid_exit unconditionally.
*
- * The MSR write ensures that no subsequent load is based on a
- * mispredicted GSBASE. No extra FENCE required.
+ * The unconditional write to GS base below ensures that no subsequent
+ * loads based on a mispredicted GS base can happen, therefore no LFENCE
+ * is needed here.
*/
SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
ret
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 1583831f61a9..f2fe0a33bcfd 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -12,8 +12,13 @@
* Reuse the 64-bit entry points for the x32 versions that occupy different
* slots in the syscall table.
*/
+#define __x32_sys_readv __x64_sys_readv
+#define __x32_sys_writev __x64_sys_writev
#define __x32_sys_getsockopt __x64_sys_getsockopt
#define __x32_sys_setsockopt __x64_sys_setsockopt
+#define __x32_sys_vmsplice __x64_sys_vmsplice
+#define __x32_sys_process_vm_readv __x64_sys_process_vm_readv
+#define __x32_sys_process_vm_writev __x64_sys_process_vm_writev
#define __SYSCALL_64(nr, sym)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9d1102873666..0d0667a9fbd7 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -32,7 +32,7 @@
18 i386 oldstat sys_stat
19 i386 lseek sys_lseek compat_sys_lseek
20 i386 getpid sys_getpid
-21 i386 mount sys_mount compat_sys_mount
+21 i386 mount sys_mount
22 i386 umount sys_oldumount
23 i386 setuid sys_setuid16
24 i386 getuid sys_getuid16
@@ -142,7 +142,7 @@
128 i386 init_module sys_init_module
129 i386 delete_module sys_delete_module
130 i386 get_kernel_syms
-131 i386 quotactl sys_quotactl compat_sys_quotactl32
+131 i386 quotactl sys_quotactl
132 i386 getpgid sys_getpgid
133 i386 fchdir sys_fchdir
134 i386 bdflush sys_bdflush
@@ -156,8 +156,8 @@
142 i386 _newselect sys_select compat_sys_select
143 i386 flock sys_flock
144 i386 msync sys_msync
-145 i386 readv sys_readv compat_sys_readv
-146 i386 writev sys_writev compat_sys_writev
+145 i386 readv sys_readv
+146 i386 writev sys_writev
147 i386 getsid sys_getsid
148 i386 fdatasync sys_fdatasync
149 i386 _sysctl sys_ni_syscall
@@ -327,7 +327,7 @@
313 i386 splice sys_splice
314 i386 sync_file_range sys_ia32_sync_file_range
315 i386 tee sys_tee
-316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
+316 i386 vmsplice sys_vmsplice
317 i386 move_pages sys_move_pages compat_sys_move_pages
318 i386 getcpu sys_getcpu
319 i386 epoll_pwait sys_epoll_pwait
@@ -358,8 +358,8 @@
344 i386 syncfs sys_syncfs
345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
346 i386 setns sys_setns
-347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+347 i386 process_vm_readv sys_process_vm_readv
+348 i386 process_vm_writev sys_process_vm_writev
349 i386 kcmp sys_kcmp
350 i386 finit_module sys_finit_module
351 i386 sched_setattr sys_sched_setattr
@@ -444,3 +444,4 @@
437 i386 openat2 sys_openat2
438 i386 pidfd_getfd sys_pidfd_getfd
439 i386 faccessat2 sys_faccessat2
+440 i386 process_madvise sys_process_madvise
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a688..379819244b91 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -361,18 +361,19 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
#
-# x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation. The __x32_compat_sys stubs are created
-# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
-# is defined.
+# Due to a historical design error, certain syscalls are numbered differently
+# in x32 as compared to native x86_64. These syscalls have numbers 512-547.
+# Do not add new syscalls to this range. Numbers 548 and above are available
+# for non-x32 use.
#
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
+515 x32 readv sys_readv
+516 x32 writev sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
@@ -388,15 +389,15 @@
529 x32 waitid compat_sys_waitid
530 x32 set_robust_list compat_sys_set_robust_list
531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
+532 x32 vmsplice sys_vmsplice
533 x32 move_pages compat_sys_move_pages
534 x32 preadv compat_sys_preadv64
535 x32 pwritev compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
537 x32 recvmmsg compat_sys_recvmmsg_time64
538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
+539 x32 process_vm_readv sys_process_vm_readv
+540 x32 process_vm_writev sys_process_vm_writev
541 x32 setsockopt sys_setsockopt
542 x32 getsockopt sys_getsockopt
543 x32 io_setup compat_sys_io_setup
@@ -404,3 +405,5 @@
545 x32 execveat compat_sys_execveat
546 x32 preadv2 compat_sys_preadv64v2
547 x32 pwritev2 compat_sys_pwritev64v2
+# This is the end of the legacy x32 range. Numbers 548 and above are
+# not special and are not to be used for x32-specific syscalls.
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 215376d975a2..21243747965d 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -9,8 +9,6 @@ ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE|
ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
include $(srctree)/lib/vdso/Makefile
-KBUILD_CFLAGS += $(DISABLE_LTO)
-
# Sanitizer runtimes are unavailable and cannot be linked here.
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
@@ -176,7 +174,7 @@ quiet_cmd_vdso = VDSO $@
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-VDSO_LDFLAGS = -shared --hash-style=both --build-id \
+VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
$(call ld-option, --eh-frame-hdr) -Bsymbolic
GCOV_PROFILE := n
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 84a4a73f77f7..283ed9d00426 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -14,6 +14,7 @@
#undef CONFIG_ILLEGAL_POINTER_VALUE
#undef CONFIG_SPARSEMEM_VMEMMAP
#undef CONFIG_NR_CPUS
+#undef CONFIG_PARAVIRT_XXL
#define CONFIG_X86_32 1
#define CONFIG_PGTABLE_LEVELS 2
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 26c36357c4c9..40669eac9d6d 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -89,6 +89,7 @@ struct perf_ibs {
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
+ unsigned int fetch_count_reset_broken : 1;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
@@ -334,11 +335,18 @@ static u64 get_ibs_op_count(u64 config)
{
u64 count = 0;
- if (config & IBS_OP_VAL)
- count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
-
- if (ibs_caps & IBS_CAPS_RDWROPCNT)
- count += (config & IBS_OP_CUR_CNT) >> 32;
+ /*
+ * If the internal 27-bit counter rolled over, the count is MaxCnt
+ * and the lower 7 bits of CurCnt are randomized.
+ * Otherwise CurCnt has the full 27-bit current counter value.
+ */
+ if (config & IBS_OP_VAL) {
+ count = (config & IBS_OP_MAX_CNT) << 4;
+ if (ibs_caps & IBS_CAPS_OPCNTEXT)
+ count += config & IBS_OP_MAX_CNT_EXT_MASK;
+ } else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
+ count = (config & IBS_OP_CUR_CNT) >> 32;
+ }
return count;
}
@@ -363,7 +371,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+ u64 tmp = hwc->config | config;
+
+ if (perf_ibs->fetch_count_reset_broken)
+ wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
+
+ wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
}
/*
@@ -394,7 +407,7 @@ static void perf_ibs_start(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
- u64 period;
+ u64 period, config = 0;
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
@@ -403,13 +416,19 @@ static void perf_ibs_start(struct perf_event *event, int flags)
hwc->state = 0;
perf_ibs_set_period(perf_ibs, hwc, &period);
+ if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
+ config |= period & IBS_OP_MAX_CNT_EXT_MASK;
+ period &= ~IBS_OP_MAX_CNT_EXT_MASK;
+ }
+ config |= period >> 4;
+
/*
* Set STARTED before enabling the hardware, such that a subsequent NMI
* must observe it.
*/
set_bit(IBS_STARTED, pcpu->state);
clear_bit(IBS_STOPPING, pcpu->state);
- perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+ perf_ibs_enable_event(perf_ibs, hwc, config);
perf_event_update_userpage(event);
}
@@ -577,7 +596,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
struct perf_ibs_data ibs_data;
int offset, size, check_rip, offset_max, throttle = 0;
unsigned int msr;
- u64 *buf, *config, period;
+ u64 *buf, *config, period, new_config = 0;
if (!test_bit(IBS_STARTED, pcpu->state)) {
fail:
@@ -626,18 +645,24 @@ fail:
perf_ibs->offset_max,
offset + 1);
} while (offset < offset_max);
+ /*
+ * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
+ * depending on their availability.
+ * Can't add to offset_max as they are staggered
+ */
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- /*
- * Read IbsBrTarget and IbsOpData4 separately
- * depending on their availability.
- * Can't add to offset_max as they are staggered
- */
- if (ibs_caps & IBS_CAPS_BRNTRGT) {
- rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
- size++;
+ if (perf_ibs == &perf_ibs_op) {
+ if (ibs_caps & IBS_CAPS_BRNTRGT) {
+ rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+ size++;
+ }
+ if (ibs_caps & IBS_CAPS_OPDATA4) {
+ rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+ size++;
+ }
}
- if (ibs_caps & IBS_CAPS_OPDATA4) {
- rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+ if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
+ rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
size++;
}
}
@@ -666,13 +691,17 @@ out:
if (throttle) {
perf_ibs_stop(event, 0);
} else {
- period >>= 4;
-
- if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
- (*config & IBS_OP_CNT_CTL))
- period |= *config & IBS_OP_CUR_CNT_RAND;
+ if (perf_ibs == &perf_ibs_op) {
+ if (ibs_caps & IBS_CAPS_OPCNTEXT) {
+ new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
+ period &= ~IBS_OP_MAX_CNT_EXT_MASK;
+ }
+ if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
+ new_config |= *config & IBS_OP_CUR_CNT_RAND;
+ }
+ new_config |= period >> 4;
- perf_ibs_enable_event(perf_ibs, hwc, period);
+ perf_ibs_enable_event(perf_ibs, hwc, new_config);
}
perf_event_update_userpage(event);
@@ -733,12 +762,26 @@ static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
+ /*
+ * Some chips fail to reset the fetch count when it is written; instead
+ * they need a 0-1 transition of IbsFetchEn.
+ */
+ if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
+ perf_ibs_fetch.fetch_count_reset_broken = 1;
+
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
*attr++ = &format_attr_cnt_ctl.attr;
}
+
+ if (ibs_caps & IBS_CAPS_OPCNTEXT) {
+ perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ }
+
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
index fb616203ce42..be50ef8572cc 100644
--- a/arch/x86/events/amd/iommu.c
+++ b/arch/x86/events/amd/iommu.c
@@ -379,7 +379,7 @@ static __init int _init_events_attrs(void)
while (amd_iommu_v2_event_descs[i].attr.attr.name)
i++;
- attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL);
+ attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 76400c052b0e..7f014d450bc2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -181,28 +181,28 @@ static void amd_uncore_del(struct perf_event *event, int flags)
}
/*
- * Convert logical CPU number to L3 PMC Config ThreadMask format
+ * Return a full thread and slice mask unless user
+ * has provided them
*/
-static u64 l3_thread_slice_mask(int cpu)
+static u64 l3_thread_slice_mask(u64 config)
{
- u64 thread_mask, core = topology_core_id(cpu);
- unsigned int shift, thread = 0;
+ if (boot_cpu_data.x86 <= 0x18)
+ return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
+ ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
- if (topology_smt_supported() && !topology_is_primary_thread(cpu))
- thread = 1;
-
- if (boot_cpu_data.x86 <= 0x18) {
- shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
- thread_mask = BIT_ULL(shift);
-
- return AMD64_L3_SLICE_MASK | thread_mask;
- }
-
- core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
- shift = AMD64_L3_THREAD_SHIFT + thread;
- thread_mask = BIT_ULL(shift);
+ /*
+ * If the user doesn't specify a threadmask, they're not trying to
+ * count core 0, so we enable all cores & threads.
+ * We'll also assume that they want to count slice 0 if they specify
+ * a threadmask and leave sliceid and enallslices unpopulated.
+ */
+ if (!(config & AMD64_L3_F19H_THREAD_MASK))
+ return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
+ AMD64_L3_EN_ALL_CORES;
- return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
+ return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
+ AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
+ AMD64_L3_COREID_MASK);
}
static int amd_uncore_event_init(struct perf_event *event)
@@ -232,7 +232,7 @@ static int amd_uncore_event_init(struct perf_event *event)
* For other events, the two fields do not affect the count.
*/
if (l3_mask && is_llc_event(event))
- hwc->config |= l3_thread_slice_mask(event->cpu);
+ hwc->config |= l3_thread_slice_mask(event->attr.config);
uncore = event_to_amd_uncore(event);
if (!uncore)
@@ -274,47 +274,72 @@ static struct attribute_group amd_uncore_attr_group = {
.attrs = amd_uncore_attrs,
};
-/*
- * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
- * on family
- */
-#define AMD_FORMAT_ATTR(_dev, _name, _format) \
-static ssize_t \
-_dev##_show##_name(struct device *dev, \
- struct device_attribute *attr, \
- char *page) \
-{ \
- BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
- return sprintf(page, _format "\n"); \
-} \
-static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
-
-/* Used for each uncore counter type */
-#define AMD_ATTRIBUTE(_name) \
-static struct attribute *amd_uncore_format_attr_##_name[] = { \
- &format_attr_event_##_name.attr, \
- &format_attr_umask.attr, \
- NULL, \
-}; \
-static struct attribute_group amd_uncore_format_group_##_name = { \
- .name = "format", \
- .attrs = amd_uncore_format_attr_##_name, \
-}; \
-static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \
- &amd_uncore_attr_group, \
- &amd_uncore_format_group_##_name, \
- NULL, \
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
+DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
+DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
+
+static struct attribute *amd_uncore_df_format_attr[] = {
+ &format_attr_event12.attr, /* event14 if F17h+ */
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static struct attribute *amd_uncore_l3_format_attr[] = {
+ &format_attr_event12.attr, /* event8 if F17h+ */
+ &format_attr_umask.attr,
+ NULL, /* slicemask if F17h, coreid if F19h */
+ NULL, /* threadmask8 if F17h, enallslices if F19h */
+ NULL, /* enallcores if F19h */
+ NULL, /* sliceid if F19h */
+ NULL, /* threadmask2 if F19h */
+ NULL,
+};
+
+static struct attribute_group amd_uncore_df_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_df_format_attr,
};
-AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
-AMD_FORMAT_ATTR(umask, , "config:8-15");
-AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
-AMD_FORMAT_ATTR(event, _l3, "config:0-7");
-AMD_ATTRIBUTE(df);
-AMD_ATTRIBUTE(l3);
+static struct attribute_group amd_uncore_l3_format_group = {
+ .name = "format",
+ .attrs = amd_uncore_l3_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_df_attr_groups[] = {
+ &amd_uncore_attr_group,
+ &amd_uncore_df_format_group,
+ NULL,
+};
+
+static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
+ &amd_uncore_attr_group,
+ &amd_uncore_l3_format_group,
+ NULL,
+};
static struct pmu amd_nb_pmu = {
.task_ctx_nr = perf_invalid_context,
+ .attr_groups = amd_uncore_df_attr_groups,
+ .name = "amd_nb",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
@@ -326,6 +351,8 @@ static struct pmu amd_nb_pmu = {
static struct pmu amd_llc_pmu = {
.task_ctx_nr = perf_invalid_context,
+ .attr_groups = amd_uncore_l3_attr_groups,
+ .name = "amd_l2",
.event_init = amd_uncore_event_init,
.add = amd_uncore_add,
.del = amd_uncore_del,
@@ -529,6 +556,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
static int __init amd_uncore_init(void)
{
+ struct attribute **df_attr = amd_uncore_df_format_attr;
+ struct attribute **l3_attr = amd_uncore_l3_format_attr;
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
@@ -538,6 +567,8 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;
+ num_counters_nb = NUM_COUNTERS_NB;
+ num_counters_llc = NUM_COUNTERS_L2;
if (boot_cpu_data.x86 >= 0x17) {
/*
* For F17h and above, the Northbridge counters are
@@ -545,27 +576,16 @@ static int __init amd_uncore_init(void)
* counters are supported too. The PMUs are exported
* based on family as either L2 or L3 and NB or DF.
*/
- num_counters_nb = NUM_COUNTERS_NB;
num_counters_llc = NUM_COUNTERS_L3;
amd_nb_pmu.name = "amd_df";
amd_llc_pmu.name = "amd_l3";
- format_attr_event_df.show = &event_show_df;
- format_attr_event_l3.show = &event_show_l3;
l3_mask = true;
- } else {
- num_counters_nb = NUM_COUNTERS_NB;
- num_counters_llc = NUM_COUNTERS_L2;
- amd_nb_pmu.name = "amd_nb";
- amd_llc_pmu.name = "amd_l2";
- format_attr_event_df = format_attr_event;
- format_attr_event_l3 = format_attr_event;
- l3_mask = false;
}
- amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
- amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
-
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
+ if (boot_cpu_data.x86 >= 0x17)
+ *df_attr = &format_attr_event14.attr;
+
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_nb) {
ret = -ENOMEM;
@@ -575,13 +595,29 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_nb;
- pr_info("%s NB counters detected\n",
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
- "HYGON" : "AMD");
+ pr_info("%d %s %s counters detected\n", num_counters_nb,
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
+ amd_nb_pmu.name);
+
ret = 0;
}
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
+ if (boot_cpu_data.x86 >= 0x19) {
+ *l3_attr++ = &format_attr_event8.attr;
+ *l3_attr++ = &format_attr_umask.attr;
+ *l3_attr++ = &format_attr_coreid.attr;
+ *l3_attr++ = &format_attr_enallslices.attr;
+ *l3_attr++ = &format_attr_enallcores.attr;
+ *l3_attr++ = &format_attr_sliceid.attr;
+ *l3_attr++ = &format_attr_threadmask2.attr;
+ } else if (boot_cpu_data.x86 >= 0x17) {
+ *l3_attr++ = &format_attr_event8.attr;
+ *l3_attr++ = &format_attr_umask.attr;
+ *l3_attr++ = &format_attr_slicemask.attr;
+ *l3_attr++ = &format_attr_threadmask8.attr;
+ }
+
amd_uncore_llc = alloc_percpu(struct amd_uncore *);
if (!amd_uncore_llc) {
ret = -ENOMEM;
@@ -591,9 +627,9 @@ static int __init amd_uncore_init(void)
if (ret)
goto fail_llc;
- pr_info("%s LLC counters detected\n",
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
- "HYGON" : "AMD");
+ pr_info("%d %s %s counters detected\n", num_counters_llc,
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
+ amd_llc_pmu.name);
ret = 0;
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1cbf57dc2ac8..a88c94d65693 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -28,6 +28,7 @@
#include <linux/bitops.h>
#include <linux/device.h>
#include <linux/nospec.h>
+#include <linux/static_call.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -52,6 +53,34 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
+/*
+ * This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
+ * from just a typename, as opposed to an actual function.
+ */
+DEFINE_STATIC_CALL_NULL(x86_pmu_handle_irq, *x86_pmu.handle_irq);
+DEFINE_STATIC_CALL_NULL(x86_pmu_disable_all, *x86_pmu.disable_all);
+DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
+DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
+DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
+DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events, *x86_pmu.schedule_events);
+DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
+DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_start_scheduling, *x86_pmu.start_scheduling);
+DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
+DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task);
+DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
+
u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -76,6 +105,9 @@ u64 x86_perf_event_update(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
+ if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
+ return x86_pmu.update_topdown_event(event);
+
/*
* Careful: an NMI might modify the previous event value.
*
@@ -660,7 +692,7 @@ static void x86_pmu_disable(struct pmu *pmu)
cpuc->enabled = 0;
barrier();
- x86_pmu.disable_all();
+ static_call(x86_pmu_disable_all)();
}
void x86_pmu_enable_all(int added)
@@ -907,8 +939,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
n0 -= cpuc->n_txn;
- if (x86_pmu.start_scheduling)
- x86_pmu.start_scheduling(cpuc);
+ static_call_cond(x86_pmu_start_scheduling)(cpuc);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = cpuc->event_constraint[i];
@@ -925,7 +956,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* change due to external factors (sibling state, allow_tfa).
*/
if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
- c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+ c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]);
cpuc->event_constraint[i] = c;
}
@@ -1008,8 +1039,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (!unsched && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
- if (x86_pmu.commit_scheduling)
- x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+ static_call_cond(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
}
} else {
for (i = n0; i < n; i++) {
@@ -1018,19 +1048,56 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/*
* release events that failed scheduling
*/
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, e);
+ static_call_cond(x86_pmu_put_event_constraints)(cpuc, e);
cpuc->event_constraint[i] = NULL;
}
}
- if (x86_pmu.stop_scheduling)
- x86_pmu.stop_scheduling(cpuc);
+ static_call_cond(x86_pmu_stop_scheduling)(cpuc);
return unsched ? -EINVAL : 0;
}
+static int add_nr_metric_event(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (is_metric_event(event)) {
+ if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
+ return -EINVAL;
+ cpuc->n_metric++;
+ cpuc->n_txn_metric++;
+ }
+
+ return 0;
+}
+
+static void del_nr_metric_event(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (is_metric_event(event))
+ cpuc->n_metric--;
+}
+
+static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
+ int max_count, int n)
+{
+
+ if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
+ return -EINVAL;
+
+ if (n >= max_count + cpuc->n_metric)
+ return -EINVAL;
+
+ cpuc->event_list[n] = event;
+ if (is_counter_pair(&event->hw)) {
+ cpuc->n_pair++;
+ cpuc->n_txn_pair++;
+ }
+
+ return 0;
+}
+
/*
* dogrp: true if must collect siblings events (group)
* returns total number of events and error code
@@ -1067,28 +1134,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
}
if (is_x86_event(leader)) {
- if (n >= max_count)
+ if (collect_event(cpuc, leader, max_count, n))
return -EINVAL;
- cpuc->event_list[n] = leader;
n++;
- if (is_counter_pair(&leader->hw))
- cpuc->n_pair++;
}
+
if (!dogrp)
return n;
for_each_sibling_event(event, leader) {
- if (!is_x86_event(event) ||
- event->state <= PERF_EVENT_STATE_OFF)
+ if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
continue;
- if (n >= max_count)
+ if (collect_event(cpuc, event, max_count, n))
return -EINVAL;
- cpuc->event_list[n] = event;
n++;
- if (is_counter_pair(&event->hw))
- cpuc->n_pair++;
}
return n;
}
@@ -1110,11 +1171,16 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
break;
+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
+ /* All the metric events are mapped onto the fixed counter 3. */
+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
+ /* fall through */
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
(idx - INTEL_PMC_IDX_FIXED);
- hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+ hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
+ INTEL_PMC_FIXED_RDPMC_BASE;
break;
default:
@@ -1226,7 +1292,7 @@ static void x86_pmu_enable(struct pmu *pmu)
cpuc->enabled = 1;
barrier();
- x86_pmu.enable_all(added);
+ static_call(x86_pmu_enable_all)(added);
}
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1245,6 +1311,10 @@ int x86_perf_event_set_period(struct perf_event *event)
if (unlikely(!hwc->event_base))
return 0;
+ if (unlikely(is_topdown_count(event)) &&
+ x86_pmu.set_topdown_event_period)
+ return x86_pmu.set_topdown_event_period(event);
+
/*
* If we are way outside a reasonable range then just skip forward:
*/
@@ -1284,11 +1354,11 @@ int x86_perf_event_set_period(struct perf_event *event)
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
- * Clear the Merge event counter's upper 16 bits since
+ * Sign extend the Merge event counter's upper 16 bits since
* we currently declare a 48-bit counter width
*/
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_event_addr(idx + 1), 0);
+ wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
/*
* Due to erratum on certan cpu we need
@@ -1347,7 +1417,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto done_collect;
- ret = x86_pmu.schedule_events(cpuc, n, assign);
+ ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
if (ret)
goto out;
/*
@@ -1365,13 +1435,11 @@ done_collect:
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
- if (x86_pmu.add) {
- /*
- * This is before x86_pmu_enable() will call x86_pmu_start(),
- * so we enable LBRs before an event needs them etc..
- */
- x86_pmu.add(event);
- }
+ /*
+ * This is before x86_pmu_enable() will call x86_pmu_start(),
+ * so we enable LBRs before an event needs them etc..
+ */
+ static_call_cond(x86_pmu_add)(event);
ret = 0;
out:
@@ -1399,7 +1467,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
cpuc->events[idx] = event;
__set_bit(idx, cpuc->active_mask);
__set_bit(idx, cpuc->running);
- x86_pmu.enable(event);
+ static_call(x86_pmu_enable)(event);
perf_event_update_userpage(event);
}
@@ -1469,7 +1537,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
if (test_bit(hwc->idx, cpuc->active_mask)) {
- x86_pmu.disable(event);
+ static_call(x86_pmu_disable)(event);
__clear_bit(hwc->idx, cpuc->active_mask);
cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
@@ -1519,8 +1587,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
if (i >= cpuc->n_events - cpuc->n_added)
--cpuc->n_added;
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, event);
+ static_call_cond(x86_pmu_put_event_constraints)(cpuc, event);
/* Delete the array entry. */
while (++i < cpuc->n_events) {
@@ -1529,17 +1596,18 @@ static void x86_pmu_del(struct perf_event *event, int flags)
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
+ if (x86_pmu.intel_cap.perf_metrics)
+ del_nr_metric_event(cpuc, event);
perf_event_update_userpage(event);
do_del:
- if (x86_pmu.del) {
- /*
- * This is after x86_pmu_stop(); so we disable LBRs after any
- * event can need them etc..
- */
- x86_pmu.del(event);
- }
+
+ /*
+ * This is after x86_pmu_stop(); so we disable LBRs after any
+ * event can need them etc..
+ */
+ static_call_cond(x86_pmu_del)(event);
}
int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -1617,7 +1685,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
return NMI_DONE;
start_clock = sched_clock();
- ret = x86_pmu.handle_irq(regs);
+ ret = static_call(x86_pmu_handle_irq)(regs);
finish_clock = sched_clock();
perf_sample_event_took(finish_clock - start_clock);
@@ -1830,6 +1898,38 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
static struct attribute_group x86_pmu_attr_group;
static struct attribute_group x86_pmu_caps_group;
+static void x86_pmu_static_call_update(void)
+{
+ static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
+ static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
+ static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
+ static_call_update(x86_pmu_enable, x86_pmu.enable);
+ static_call_update(x86_pmu_disable, x86_pmu.disable);
+
+ static_call_update(x86_pmu_add, x86_pmu.add);
+ static_call_update(x86_pmu_del, x86_pmu.del);
+ static_call_update(x86_pmu_read, x86_pmu.read);
+
+ static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
+ static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
+ static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
+
+ static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
+ static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling);
+ static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
+
+ static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
+ static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
+
+ static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
+ static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
+}
+
+static void _x86_pmu_read(struct perf_event *event)
+{
+ x86_perf_event_update(event);
+}
+
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@@ -1898,6 +1998,11 @@ static int __init init_hw_perf_events(void)
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
+ if (!x86_pmu.read)
+ x86_pmu.read = _x86_pmu_read;
+
+ x86_pmu_static_call_update();
+
/*
* Install callbacks. Core will call them for each online
* cpu.
@@ -1934,11 +2039,9 @@ out:
}
early_initcall(init_hw_perf_events);
-static inline void x86_pmu_read(struct perf_event *event)
+static void x86_pmu_read(struct perf_event *event)
{
- if (x86_pmu.read)
- return x86_pmu.read(event);
- x86_perf_event_update(event);
+ static_call(x86_pmu_read)(event);
}
/*
@@ -1962,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
perf_pmu_disable(pmu);
__this_cpu_write(cpu_hw_events.n_txn, 0);
+ __this_cpu_write(cpu_hw_events.n_txn_pair, 0);
+ __this_cpu_write(cpu_hw_events.n_txn_metric, 0);
}
/*
@@ -1987,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
*/
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
+ __this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
+ __this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
perf_pmu_enable(pmu);
}
@@ -2015,7 +2122,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
if (!x86_pmu_initialized())
return -EAGAIN;
- ret = x86_pmu.schedule_events(cpuc, n, assign);
+ ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
if (ret)
return ret;
@@ -2208,17 +2315,15 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m
static int x86_pmu_event_idx(struct perf_event *event)
{
- int idx = event->hw.idx;
+ struct hw_perf_event *hwc = &event->hw;
- if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+ if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
- if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
- idx -= INTEL_PMC_IDX_FIXED;
- idx |= 1 << 30;
- }
-
- return idx + 1;
+ if (is_metric_idx(hwc->idx))
+ return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
+ else
+ return hwc->event_base_rdpmc + 1;
}
static ssize_t get_attr_rdpmc(struct device *cdev,
@@ -2308,15 +2413,13 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
{
- if (x86_pmu.sched_task)
- x86_pmu.sched_task(ctx, sched_in);
+ static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
}
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
{
- if (x86_pmu.swap_task_ctx)
- x86_pmu.swap_task_ctx(prev, next);
+ static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
}
void perf_check_microcode(void)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 31e6887d24f1..f1926e9f2143 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -243,10 +243,14 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
static struct event_constraint intel_icl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
+ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
@@ -309,6 +313,12 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
"4", "2");
+EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
+EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
+EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
+
static struct attribute *snb_events_attrs[] = {
EVENT_PTR(td_slots_issued),
EVENT_PTR(td_slots_retired),
@@ -2165,11 +2175,24 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
static void intel_pmu_disable_fixed(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask;
+ int idx = hwc->idx;
- mask = 0xfULL << (idx * 4);
+ if (is_topdown_idx(idx)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * When there are other active TopDown events,
+ * don't disable the fixed counter 3.
+ */
+ if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
+ return;
+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
+ }
+ intel_clear_masks(event, idx);
+
+ mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
wrmsrl(hwc->config_base, ctrl_val);
@@ -2180,17 +2203,28 @@ static void intel_pmu_disable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- if (idx < INTEL_PMC_IDX_FIXED) {
+ switch (idx) {
+ case 0 ... INTEL_PMC_IDX_FIXED - 1:
intel_clear_masks(event, idx);
x86_pmu_disable_event(event);
- } else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
- intel_clear_masks(event, idx);
+ break;
+ case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_disable_fixed(event);
- } else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
+ break;
+ case INTEL_PMC_IDX_FIXED_BTS:
intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer();
- } else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
+ return;
+ case INTEL_PMC_IDX_FIXED_VLBR:
intel_clear_masks(event, idx);
+ break;
+ default:
+ intel_clear_masks(event, idx);
+ pr_warn("Failed to disable the event with invalid index %d\n",
+ idx);
+ return;
+ }
/*
* Needs to be called after x86_pmu_disable_event,
@@ -2208,10 +2242,189 @@ static void intel_pmu_del_event(struct perf_event *event)
intel_pmu_pebs_del(event);
}
+static int icl_set_topdown_event_period(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = local64_read(&hwc->period_left);
+
+ /*
+ * The values in PERF_METRICS MSR are derived from fixed counter 3.
+ * Software should start both registers, PERF_METRICS and fixed
+ * counter 3, from zero.
+ * Clear PERF_METRICS and Fixed counter 3 in initialization.
+ * After that, both MSRs will be cleared for each read.
+ * Don't need to clear them again.
+ */
+ if (left == x86_pmu.max_period) {
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrl(MSR_PERF_METRICS, 0);
+ hwc->saved_slots = 0;
+ hwc->saved_metric = 0;
+ }
+
+ if ((hwc->saved_slots) && is_slots_event(event)) {
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+ wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
+ }
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
+{
+ u32 val;
+
+ /*
+ * The metric is reported as an 8bit integer fraction
+ * suming up to 0xff.
+ * slots-in-metric = (Metric / 0xff) * slots
+ */
+ val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff;
+ return mul_u64_u32_div(slots, val, 0xff);
+}
+
+static u64 icl_get_topdown_value(struct perf_event *event,
+ u64 slots, u64 metrics)
+{
+ int idx = event->hw.idx;
+ u64 delta;
+
+ if (is_metric_idx(idx))
+ delta = icl_get_metrics_event_value(metrics, slots, idx);
+ else
+ delta = slots;
+
+ return delta;
+}
+
+static void __icl_update_topdown_event(struct perf_event *event,
+ u64 slots, u64 metrics,
+ u64 last_slots, u64 last_metrics)
+{
+ u64 delta, last = 0;
+
+ delta = icl_get_topdown_value(event, slots, metrics);
+ if (last_slots)
+ last = icl_get_topdown_value(event, last_slots, last_metrics);
+
+ /*
+ * The 8bit integer fraction of metric may be not accurate,
+ * especially when the changes is very small.
+ * For example, if only a few bad_spec happens, the fraction
+ * may be reduced from 1 to 0. If so, the bad_spec event value
+ * will be 0 which is definitely less than the last value.
+ * Avoid update event->count for this case.
+ */
+ if (delta > last) {
+ delta -= last;
+ local64_add(delta, &event->count);
+ }
+}
+
+static void update_saved_topdown_regs(struct perf_event *event,
+ u64 slots, u64 metrics)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ int idx;
+
+ event->hw.saved_slots = slots;
+ event->hw.saved_metric = metrics;
+
+ for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ other->hw.saved_slots = slots;
+ other->hw.saved_metric = metrics;
+ }
+}
+
+/*
+ * Update all active Topdown events.
+ *
+ * The PERF_METRICS and Fixed counter 3 are read separately. The values may be
+ * modify by a NMI. PMU has to be disabled before calling this function.
+ */
+static u64 icl_update_topdown_event(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ u64 slots, metrics;
+ bool reset = true;
+ int idx;
+
+ /* read Fixed counter 3 */
+ rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
+ if (!slots)
+ return 0;
+
+ /* read PERF_METRICS */
+ rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+
+ for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ __icl_update_topdown_event(other, slots, metrics,
+ event ? event->hw.saved_slots : 0,
+ event ? event->hw.saved_metric : 0);
+ }
+
+ /*
+ * Check and update this event, which may have been cleared
+ * in active_mask e.g. x86_pmu_stop()
+ */
+ if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
+ __icl_update_topdown_event(event, slots, metrics,
+ event->hw.saved_slots,
+ event->hw.saved_metric);
+
+ /*
+ * In x86_pmu_stop(), the event is cleared in active_mask first,
+ * then drain the delta, which indicates context switch for
+ * counting.
+ * Save metric and slots for context switch.
+ * Don't need to reset the PERF_METRICS and Fixed counter 3.
+ * Because the values will be restored in next schedule in.
+ */
+ update_saved_topdown_regs(event, slots, metrics);
+ reset = false;
+ }
+
+ if (reset) {
+ /* The fixed counter 3 has to be written before the PERF_METRICS. */
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrl(MSR_PERF_METRICS, 0);
+ if (event)
+ update_saved_topdown_regs(event, 0, 0);
+ }
+
+ return slots;
+}
+
+static void intel_pmu_read_topdown_event(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /* Only need to call update_topdown_event() once for group read. */
+ if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
+ !is_slots_event(event))
+ return;
+
+ perf_pmu_disable(event->pmu);
+ x86_pmu.update_topdown_event(event);
+ perf_pmu_enable(event->pmu);
+}
+
static void intel_pmu_read_event(struct perf_event *event)
{
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_auto_reload_read(event);
+ else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
+ intel_pmu_read_topdown_event(event);
else
x86_perf_event_update(event);
}
@@ -2219,8 +2432,22 @@ static void intel_pmu_read_event(struct perf_event *event)
static void intel_pmu_enable_fixed(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask, bits = 0;
+ int idx = hwc->idx;
+
+ if (is_topdown_idx(idx)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ /*
+ * When there are other active TopDown events,
+ * don't enable the fixed counter 3 again.
+ */
+ if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
+ return;
+
+ idx = INTEL_PMC_IDX_FIXED_SLOTS;
+ }
+
+ intel_set_masks(event, idx);
/*
* Enable IRQ generation (0x8), if not PEBS,
@@ -2240,6 +2467,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
bits |= 0x4;
+ idx -= INTEL_PMC_IDX_FIXED;
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);
@@ -2262,18 +2490,27 @@ static void intel_pmu_enable_event(struct perf_event *event)
if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_enable(event);
- if (idx < INTEL_PMC_IDX_FIXED) {
+ switch (idx) {
+ case 0 ... INTEL_PMC_IDX_FIXED - 1:
intel_set_masks(event, idx);
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
- } else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
- intel_set_masks(event, idx);
+ break;
+ case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_enable_fixed(event);
- } else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
+ break;
+ case INTEL_PMC_IDX_FIXED_BTS:
if (!__this_cpu_read(cpu_hw_events.enabled))
return;
intel_pmu_enable_bts(hwc->config);
- } else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
+ break;
+ case INTEL_PMC_IDX_FIXED_VLBR:
intel_set_masks(event, idx);
+ break;
+ default:
+ pr_warn("Failed to enable the event with invalid index %d\n",
+ idx);
+ }
}
static void intel_pmu_add_event(struct perf_event *event)
@@ -2389,7 +2626,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
/*
* PEBS overflow sets bit 62 in the global status register
*/
- if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) {
u64 pebs_enabled = cpuc->pebs_enabled;
handled++;
@@ -2410,7 +2647,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
/*
* Intel PT
*/
- if (__test_and_clear_bit(55, (unsigned long *)&status)) {
+ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
handled++;
if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
perf_guest_cbs->handle_intel_pt_intr))
@@ -2420,6 +2657,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
}
/*
+ * Intel Perf mertrics
+ */
+ if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
+ handled++;
+ if (x86_pmu.update_topdown_event)
+ x86_pmu.update_topdown_event(NULL);
+ }
+
+ /*
* Checkpointed counters can lead to 'spurious' PMIs because the
* rollback caused by the PMI will have cleared the overflow status
* bit. Therefore always force probe these counters.
@@ -3355,6 +3601,56 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (event->attr.type != PERF_TYPE_RAW)
return 0;
+ /*
+ * Config Topdown slots and metric events
+ *
+ * The slots event on Fixed Counter 3 can support sampling,
+ * which will be handled normally in x86_perf_event_update().
+ *
+ * Metric events don't support sampling and require being paired
+ * with a slots event as group leader. When the slots event
+ * is used in a metrics group, it too cannot support sampling.
+ */
+ if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
+ if (event->attr.config1 || event->attr.config2)
+ return -EINVAL;
+
+ /*
+ * The TopDown metrics events and slots event don't
+ * support any filters.
+ */
+ if (event->attr.config & X86_ALL_EVENT_FLAGS)
+ return -EINVAL;
+
+ if (is_metric_event(event)) {
+ struct perf_event *leader = event->group_leader;
+
+ /* The metric events don't support sampling. */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /* The metric events require a slots group leader. */
+ if (!is_slots_event(leader))
+ return -EINVAL;
+
+ /*
+ * The leader/SLOTS must not be a sampling event for
+ * metric use; hardware requires it starts at 0 when used
+ * in conjunction with MSR_PERF_METRICS.
+ */
+ if (is_sampling_event(leader))
+ return -EINVAL;
+
+ event->event_caps |= PERF_EV_CAP_SIBLING;
+ /*
+ * Only once we have a METRICs sibling do we
+ * need TopDown magic.
+ */
+ leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;
+ event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
+ }
+ }
+
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
return 0;
@@ -3787,6 +4083,17 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.counter_freezing)
enable_counter_freeze();
+ /* Disable perf metrics if any added CPU doesn't support it. */
+ if (x86_pmu.intel_cap.perf_metrics) {
+ union perf_capabilities perf_cap;
+
+ rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
+ if (!perf_cap.perf_metrics) {
+ x86_pmu.intel_cap.perf_metrics = 0;
+ x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
+ }
+ }
+
if (!cpuc->shared_regs)
return;
@@ -4355,6 +4662,15 @@ static struct attribute *icl_events_attrs[] = {
NULL,
};
+static struct attribute *icl_td_events_attrs[] = {
+ EVENT_PTR(slots),
+ EVENT_PTR(td_retiring),
+ EVENT_PTR(td_bad_spec),
+ EVENT_PTR(td_fe_bound),
+ EVENT_PTR(td_be_bound),
+ NULL,
+};
+
static struct attribute *icl_tsx_events_attrs[] = {
EVENT_PTR(tx_start),
EVENT_PTR(tx_abort),
@@ -4830,6 +5146,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ATOM_TREMONT_D:
case INTEL_FAM6_ATOM_TREMONT:
+ case INTEL_FAM6_ATOM_TREMONT_L:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -5139,10 +5456,13 @@ __init int intel_pmu_init(void)
hsw_format_attr : nhm_format_attr;
extra_skl_attr = skl_format_attr;
mem_attr = icl_events_attrs;
+ td_attr = icl_td_events_attrs;
tsx_attr = icl_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem);
+ x86_pmu.update_topdown_event = icl_update_topdown_event;
+ x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
pr_cont("Icelake events, ");
name = "icelake";
break;
@@ -5198,6 +5518,15 @@ __init int intel_pmu_init(void)
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
+ /*
+ * Don't extend the topdown slots and metrics
+ * events to the generic counters.
+ */
+ if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
+ c->weight = hweight64(c->idxmsk64);
+ continue;
+ }
+
if (c->cmask == FIXED_EVENT_FLAGS
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
@@ -5253,6 +5582,9 @@ __init int intel_pmu_init(void)
if (x86_pmu.counter_freezing)
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
+ if (x86_pmu.intel_cap.perf_metrics)
+ x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
+
return 0;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 86848c57b55e..404315df1e16 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -670,9 +670,7 @@ unlock:
static inline void intel_pmu_drain_pebs_buffer(void)
{
- struct pt_regs regs;
-
- x86_pmu.drain_pebs(&regs);
+ x86_pmu.drain_pebs(NULL);
}
/*
@@ -1737,6 +1735,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
struct x86_perf_regs perf_regs;
struct pt_regs *regs = &perf_regs.regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
+ struct pt_regs dummy_iregs;
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
/*
@@ -1749,6 +1748,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
} else if (!intel_pmu_save_and_restart(event))
return;
+ if (!iregs)
+ iregs = &dummy_iregs;
+
while (count > 1) {
setup_sample(event, iregs, at, &data, regs);
perf_event_output(event, &data, regs);
@@ -1758,16 +1760,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
}
setup_sample(event, iregs, at, &data, regs);
-
- /*
- * All but the last records are processed.
- * The last one is left to be able to call the overflow handler.
- */
- if (perf_event_overflow(event, &data, regs)) {
- x86_pmu_stop(event, 0);
- return;
+ if (iregs == &dummy_iregs) {
+ /*
+ * The PEBS records may be drained in the non-overflow context,
+ * e.g., large PEBS + context switch. Perf should treat the
+ * last record the same as other PEBS records, and doesn't
+ * invoke the generic overflow handler.
+ */
+ perf_event_output(event, &data, regs);
+ } else {
+ /*
+ * All but the last records are processed.
+ * The last one is left to be able to call the overflow handler.
+ */
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
}
-
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d5c6d3b340c5..86d012b3e0b4 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -12,6 +12,8 @@ struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
static bool pcidrv_registered;
struct pci_driver *uncore_pci_driver;
+/* The PCI driver for the device which the uncore doesn't own. */
+struct pci_driver *uncore_pci_sub_driver;
/* pci bus to socket mapping */
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
@@ -989,65 +991,71 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
}
/*
- * add a pci uncore device
+ * Get the die information of a PCI device.
+ * @pdev: The PCI device.
+ * @phys_id: The physical socket id which the device maps to.
+ * @die: The die id which the device maps to.
*/
-static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
+ int *phys_id, int *die)
{
- struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu = NULL;
- struct intel_uncore_box *box;
- int phys_id, die, ret;
-
- phys_id = uncore_pcibus_to_physid(pdev->bus);
- if (phys_id < 0)
+ *phys_id = uncore_pcibus_to_physid(pdev->bus);
+ if (*phys_id < 0)
return -ENODEV;
- die = (topology_max_die_per_package() > 1) ? phys_id :
- topology_phys_to_logical_pkg(phys_id);
- if (die < 0)
+ *die = (topology_max_die_per_package() > 1) ? *phys_id :
+ topology_phys_to_logical_pkg(*phys_id);
+ if (*die < 0)
return -EINVAL;
- if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
- int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
-
- uncore_extra_pci_dev[die].dev[idx] = pdev;
- pci_set_drvdata(pdev, NULL);
- return 0;
- }
-
- type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+ return 0;
+}
- /*
- * Some platforms, e.g. Knights Landing, use a common PCI device ID
- * for multiple instances of an uncore PMU device type. We should check
- * PCI slot and func to indicate the uncore box.
- */
- if (id->driver_data & ~0xffff) {
- struct pci_driver *pci_drv = pdev->driver;
- const struct pci_device_id *ids = pci_drv->id_table;
- unsigned int devfn;
-
- while (ids && ids->vendor) {
- if ((ids->vendor == pdev->vendor) &&
- (ids->device == pdev->device)) {
- devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
- UNCORE_PCI_DEV_FUNC(ids->driver_data));
- if (devfn == pdev->devfn) {
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
- break;
- }
+/*
+ * Find the PMU of a PCI device.
+ * @pdev: The PCI device.
+ * @ids: The ID table of the available PCI devices with a PMU.
+ */
+static struct intel_uncore_pmu *
+uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
+{
+ struct intel_uncore_pmu *pmu = NULL;
+ struct intel_uncore_type *type;
+ kernel_ulong_t data;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ data = ids->driver_data;
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
+ UNCORE_PCI_DEV_FUNC(data));
+ if (devfn == pdev->devfn) {
+ type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
+ break;
}
- ids++;
}
- if (pmu == NULL)
- return -ENODEV;
- } else {
- /*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
- */
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+ ids++;
}
+ return pmu;
+}
+
+/*
+ * Register the PMU for a PCI device
+ * @pdev: The PCI device.
+ * @type: The corresponding PMU type of the device.
+ * @pmu: The corresponding PMU of the device.
+ * @phys_id: The physical socket id which the device maps to.
+ * @die: The die id which the device maps to.
+ */
+static int uncore_pci_pmu_register(struct pci_dev *pdev,
+ struct intel_uncore_type *type,
+ struct intel_uncore_pmu *pmu,
+ int phys_id, int die)
+{
+ struct intel_uncore_box *box;
+ int ret;
if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
return -EINVAL;
@@ -1067,7 +1075,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
box->pci_dev = pdev;
box->pmu = pmu;
uncore_box_init(box);
- pci_set_drvdata(pdev, box);
pmu->boxes[die] = box;
if (atomic_inc_return(&pmu->activeboxes) > 1)
@@ -1076,7 +1083,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
/* First active box registers the pmu */
ret = uncore_pmu_register(pmu);
if (ret) {
- pci_set_drvdata(pdev, NULL);
pmu->boxes[die] = NULL;
uncore_box_exit(box);
kfree(box);
@@ -1084,18 +1090,87 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
return ret;
}
+/*
+ * add a pci uncore device
+ */
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu = NULL;
+ int phys_id, die, ret;
+
+ ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
+ if (ret)
+ return ret;
+
+ if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+ int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
+
+ uncore_extra_pci_dev[die].dev[idx] = pdev;
+ pci_set_drvdata(pdev, NULL);
+ return 0;
+ }
+
+ type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
+ /*
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
+ */
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+
+ pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+ }
+
+ ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
+
+ pci_set_drvdata(pdev, pmu->boxes[die]);
+
+ return ret;
+}
+
+/*
+ * Unregister the PMU of a PCI device
+ * @pmu: The corresponding PMU is unregistered.
+ * @phys_id: The physical socket id which the device maps to.
+ * @die: The die id which the device maps to.
+ */
+static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu,
+ int phys_id, int die)
+{
+ struct intel_uncore_box *box = pmu->boxes[die];
+
+ if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
+ return;
+
+ pmu->boxes[die] = NULL;
+ if (atomic_dec_return(&pmu->activeboxes) == 0)
+ uncore_pmu_unregister(pmu);
+ uncore_box_exit(box);
+ kfree(box);
+}
+
static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, die;
- phys_id = uncore_pcibus_to_physid(pdev->bus);
+ if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
+ return;
box = pci_get_drvdata(pdev);
if (!box) {
- die = (topology_max_die_per_package() > 1) ? phys_id :
- topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
if (uncore_extra_pci_dev[die].dev[i] == pdev) {
uncore_extra_pci_dev[die].dev[i] = NULL;
@@ -1107,15 +1182,84 @@ static void uncore_pci_remove(struct pci_dev *pdev)
}
pmu = box->pmu;
- if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
- return;
pci_set_drvdata(pdev, NULL);
- pmu->boxes[box->dieid] = NULL;
- if (atomic_dec_return(&pmu->activeboxes) == 0)
- uncore_pmu_unregister(pmu);
- uncore_box_exit(box);
- kfree(box);
+
+ uncore_pci_pmu_unregister(pmu, phys_id, die);
+}
+
+static int uncore_bus_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct intel_uncore_pmu *pmu;
+ int phys_id, die;
+
+ /* Unregister the PMU when the device is going to be deleted. */
+ if (action != BUS_NOTIFY_DEL_DEVICE)
+ return NOTIFY_DONE;
+
+ pmu = uncore_pci_find_dev_pmu(pdev, uncore_pci_sub_driver->id_table);
+ if (!pmu)
+ return NOTIFY_DONE;
+
+ if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
+ return NOTIFY_DONE;
+
+ uncore_pci_pmu_unregister(pmu, phys_id, die);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block uncore_notifier = {
+ .notifier_call = uncore_bus_notify,
+};
+
+static void uncore_pci_sub_driver_init(void)
+{
+ const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu;
+ struct pci_dev *pci_sub_dev;
+ bool notify = false;
+ unsigned int devfn;
+ int phys_id, die;
+
+ while (ids && ids->vendor) {
+ pci_sub_dev = NULL;
+ type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
+ /*
+ * Search the available device, and register the
+ * corresponding PMU.
+ */
+ while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ ids->device, pci_sub_dev))) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn != pci_sub_dev->devfn)
+ continue;
+
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ if (!pmu)
+ continue;
+
+ if (uncore_pci_get_dev_die_info(pci_sub_dev,
+ &phys_id, &die))
+ continue;
+
+ if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
+ phys_id, die))
+ notify = true;
+ }
+ ids++;
+ }
+
+ if (notify && bus_register_notifier(&pci_bus_type, &uncore_notifier))
+ notify = false;
+
+ if (!notify)
+ uncore_pci_sub_driver = NULL;
}
static int __init uncore_pci_init(void)
@@ -1141,6 +1285,9 @@ static int __init uncore_pci_init(void)
if (ret)
goto errtype;
+ if (uncore_pci_sub_driver)
+ uncore_pci_sub_driver_init();
+
pcidrv_registered = true;
return 0;
@@ -1158,6 +1305,8 @@ static void uncore_pci_exit(void)
{
if (pcidrv_registered) {
pcidrv_registered = false;
+ if (uncore_pci_sub_driver)
+ bus_unregister_notifier(&pci_bus_type, &uncore_notifier);
pci_unregister_driver(uncore_pci_driver);
uncore_types_exit(uncore_pci_uncores);
kfree(uncore_extra_pci_dev);
@@ -1478,12 +1627,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
};
static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
- .cpu_init = icl_uncore_cpu_init,
+ .cpu_init = tgl_uncore_cpu_init,
.mmio_init = tgl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
- .cpu_init = icl_uncore_cpu_init,
+ .cpu_init = tgl_uncore_cpu_init,
.mmio_init = tgl_l_uncore_mmio_init,
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 105fdc69825e..83d2a7d490e0 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -552,6 +552,7 @@ extern struct intel_uncore_type **uncore_msr_uncores;
extern struct intel_uncore_type **uncore_pci_uncores;
extern struct intel_uncore_type **uncore_mmio_uncores;
extern struct pci_driver *uncore_pci_driver;
+extern struct pci_driver *uncore_pci_sub_driver;
extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
extern struct pci_extra_dev *uncore_extra_pci_dev;
@@ -567,6 +568,7 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
+void tgl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 6a4ca27b2c9e..39e632ed6ca9 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -126,6 +126,10 @@
#define ICL_UNC_CBO_0_PER_CTR0 0x702
#define ICL_UNC_CBO_MSR_OFFSET 0x8
+/* ICL ARB register */
+#define ICL_UNC_ARB_PER_CTR 0x3b1
+#define ICL_UNC_ARB_PERFEVTSEL 0x3b3
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -313,15 +317,21 @@ void skl_uncore_cpu_init(void)
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
+static struct intel_uncore_ops icl_uncore_msr_ops = {
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
static struct intel_uncore_type icl_uncore_cbox = {
.name = "cbox",
- .num_counters = 4,
+ .num_counters = 2,
.perf_ctr_bits = 44,
.perf_ctr = ICL_UNC_CBO_0_PER_CTR0,
.event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
.event_mask = SNB_UNC_RAW_EVENT_MASK,
.msr_offset = ICL_UNC_CBO_MSR_OFFSET,
- .ops = &skl_uncore_msr_ops,
+ .ops = &icl_uncore_msr_ops,
.format_group = &snb_uncore_format_group,
};
@@ -350,13 +360,25 @@ static struct intel_uncore_type icl_uncore_clockbox = {
.single_fixed = 1,
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
.format_group = &icl_uncore_clock_format_group,
- .ops = &skl_uncore_msr_ops,
+ .ops = &icl_uncore_msr_ops,
.event_descs = icl_uncore_events,
};
+static struct intel_uncore_type icl_uncore_arb = {
+ .name = "arb",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .perf_ctr = ICL_UNC_ARB_PER_CTR,
+ .event_ctl = ICL_UNC_ARB_PERFEVTSEL,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .ops = &icl_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+};
+
static struct intel_uncore_type *icl_msr_uncores[] = {
&icl_uncore_cbox,
- &snb_uncore_arb,
+ &icl_uncore_arb,
&icl_uncore_clockbox,
NULL,
};
@@ -374,6 +396,21 @@ void icl_uncore_cpu_init(void)
{
uncore_msr_uncores = icl_msr_uncores;
icl_uncore_cbox.num_boxes = icl_get_cbox_num();
+}
+
+static struct intel_uncore_type *tgl_msr_uncores[] = {
+ &icl_uncore_cbox,
+ &snb_uncore_arb,
+ &icl_uncore_clockbox,
+ NULL,
+};
+
+void tgl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = tgl_msr_uncores;
+ icl_uncore_cbox.num_boxes = icl_get_cbox_num();
+ icl_uncore_cbox.ops = &skl_uncore_msr_ops;
+ icl_uncore_clockbox.ops = &skl_uncore_msr_ops;
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 62e88ad919ff..7bdb1821215d 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -393,6 +393,11 @@
#define SNR_M2M_PCI_PMON_BOX_CTL 0x438
#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff
+/* SNR PCIE3 */
+#define SNR_PCIE3_PCI_PMON_CTL0 0x508
+#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8
+#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e0
+
/* SNR IMC */
#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54
#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38
@@ -3749,7 +3754,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
ret = skx_iio_get_topology(type);
if (ret)
- return ret;
+ goto clear_attr_update;
+
+ ret = -ENOMEM;
/* One more for NULL. */
attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL);
@@ -3781,8 +3788,9 @@ err:
kfree(eas);
kfree(attrs);
kfree(type->topology);
+clear_attr_update:
type->attr_update = NULL;
- return -ENOMEM;
+ return ret;
}
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
@@ -4551,12 +4559,46 @@ static struct intel_uncore_type snr_uncore_m2m = {
.format_group = &snr_m2m_uncore_format_group,
};
+static void snr_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base, (u32)(hwc->config | SNBEP_PMON_CTL_EN));
+ pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32));
+}
+
+static struct intel_uncore_ops snr_pcie3_uncore_pci_ops = {
+ .init_box = snr_m2m_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snr_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+static struct intel_uncore_type snr_uncore_pcie3 = {
+ .name = "pcie3",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0,
+ .event_ctl = SNR_PCIE3_PCI_PMON_CTL0,
+ .event_mask = SKX_IIO_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
+ .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL,
+ .ops = &snr_pcie3_uncore_pci_ops,
+ .format_group = &skx_uncore_iio_format_group,
+};
+
enum {
SNR_PCI_UNCORE_M2M,
+ SNR_PCI_UNCORE_PCIE3,
};
static struct intel_uncore_type *snr_pci_uncores[] = {
[SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m,
+ [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3,
NULL,
};
@@ -4573,6 +4615,19 @@ static struct pci_driver snr_uncore_pci_driver = {
.id_table = snr_uncore_pci_ids,
};
+static const struct pci_device_id snr_uncore_pci_sub_ids[] = {
+ { /* PCIe3 RP */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a),
+ .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver snr_uncore_pci_sub_driver = {
+ .name = "snr_uncore_sub",
+ .id_table = snr_uncore_pci_sub_ids,
+};
+
int snr_uncore_pci_init(void)
{
/* SNR UBOX DID */
@@ -4584,6 +4639,7 @@ int snr_uncore_pci_init(void)
uncore_pci_uncores = snr_pci_uncores;
uncore_pci_driver = &snr_uncore_pci_driver;
+ uncore_pci_sub_driver = &snr_uncore_pci_sub_driver;
return 0;
}
@@ -4751,10 +4807,10 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
- INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
- INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
{ /* end: all zeroes */ },
};
@@ -5212,17 +5268,17 @@ static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
- INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
- INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"),
- INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"),
- INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "6.103515625e-5"),
INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"),
{ /* end: all zeroes */ },
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index a949f6f55991..4be8f9cabd07 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -78,6 +78,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_TREMONT_D:
case INTEL_FAM6_ATOM_TREMONT:
+ case INTEL_FAM6_ATOM_TREMONT_L:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 7b68ab5f19e7..ee2b9b9fc2a5 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -79,6 +79,31 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
+#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
+
+static inline bool is_topdown_count(struct perf_event *event)
+{
+ return event->hw.flags & PERF_X86_EVENT_TOPDOWN;
+}
+
+static inline bool is_metric_event(struct perf_event *event)
+{
+ u64 config = event->attr.config;
+
+ return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) &&
+ ((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) &&
+ ((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX);
+}
+
+static inline bool is_slots_event(struct perf_event *event)
+{
+ return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS;
+}
+
+static inline bool is_topdown_event(struct perf_event *event)
+{
+ return is_metric_event(event) || is_slots_event(event);
+}
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -210,6 +235,8 @@ struct cpu_hw_events {
they've never been enabled yet */
int n_txn; /* the # last events in the below arrays;
added in the current transaction */
+ int n_txn_pair;
+ int n_txn_metric;
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
u64 tags[X86_PMC_IDX_MAX];
@@ -285,6 +312,12 @@ struct cpu_hw_events {
u64 tfa_shadow;
/*
+ * Perf Metrics
+ */
+ /* number of accepted metrics events */
+ int n_metric;
+
+ /*
* AMD specific bits
*/
struct amd_nb *amd_nb;
@@ -376,6 +409,19 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
/*
+ * The special metric counters do not actually exist. They are calculated from
+ * the combination of the FxCtr3 + MSR_PERF_METRICS.
+ *
+ * The special metric counters are mapped to a dummy offset for the scheduler.
+ * The sharing between multiple users of the same metric without multiplexing
+ * is not allowed, even though the hardware supports that in principle.
+ */
+
+#define METRIC_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \
+ INTEL_ARCH_EVENT_MASK)
+
+/*
* Constraint on the Event code + UMask
*/
#define INTEL_UEVENT_CONSTRAINT(c, n) \
@@ -537,7 +583,7 @@ union perf_capabilities {
*/
u64 full_width_write:1;
u64 pebs_baseline:1;
- u64 pebs_metrics_available:1;
+ u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
};
u64 capabilities;
@@ -727,6 +773,12 @@ struct x86_pmu {
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
/*
+ * Intel perf metrics
+ */
+ u64 (*update_topdown_event)(struct perf_event *event);
+ int (*set_topdown_event_period)(struct perf_event *event);
+
+ /*
* perf task context (i.e. struct perf_event_context::task_ctx_data)
* switch helper to bridge calls from perf/core to perf/x86.
* See struct pmu::swap_task_ctx() usage for examples;
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 67b411f7e8c4..7c0120e2e957 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -815,6 +815,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h),
+ X86_MATCH_VENDOR_FAM(AMD, 0x19, &model_amd_fam17h),
{},
};
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 6035df1b49e1..e04d90af4c27 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -148,9 +148,9 @@ static inline bool hv_reenlightenment_available(void)
* Check for required features and priviliges to make TSC frequency
* change notifications work.
*/
- return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ return ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE &&
- ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT;
+ ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT;
}
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment)
@@ -330,8 +330,8 @@ void __init hyperv_init(void)
return;
/* Absolutely required MSRs */
- required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE |
- HV_X64_MSR_VP_INDEX_AVAILABLE;
+ required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
+ HV_MSR_VP_INDEX_AVAILABLE;
if ((ms_hyperv.features & required_msrs) != required_msrs)
return;
diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c
index 07f21a06392f..f3270c1fc48c 100644
--- a/arch/x86/hyperv/hv_spinlock.c
+++ b/arch/x86/hyperv/hv_spinlock.c
@@ -66,7 +66,7 @@ void __init hv_init_spinlocks(void)
{
if (!hv_pvspin || !apic ||
!(ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) ||
- !(ms_hyperv.features & HV_X64_MSR_GUEST_IDLE_AVAILABLE)) {
+ !(ms_hyperv.features & HV_MSR_GUEST_IDLE_AVAILABLE)) {
pr_info("PV spinlocks disabled\n");
return;
}
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index ca8a657edf59..a09fc37ead9d 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -239,7 +239,6 @@ beyond_if:
(regs)->ss = __USER32_DS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 =
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
- set_fs(USER_DS);
return 0;
}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index ca0976456a6b..6d2df1ee427b 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -159,8 +159,6 @@ static inline u64 x86_default_get_root_pointer(void)
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
-#define acpi_unlazy_tlb(x) leave_mm(x)
-
#ifdef CONFIG_ACPI_APEI
static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 2cc44e957c31..4e3099d9ae62 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -374,12 +374,12 @@ extern struct apic *apic;
#define apic_driver(sym) \
static const struct apic *__apicdrivers_##sym __used \
__aligned(sizeof(struct apic *)) \
- __section(.apicdrivers) = { &sym }
+ __section(".apicdrivers") = { &sym }
#define apic_drivers(sym1, sym2) \
static struct apic *__apicdrivers_##sym1##sym2[2] __used \
__aligned(sizeof(struct apic *)) \
- __section(.apicdrivers) = { &sym1, &sym2 }
+ __section(".apicdrivers") = { &sym1, &sym2 }
extern struct apic *__apicdrivers[], *__apicdrivers_end[];
@@ -519,6 +519,14 @@ static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
static inline void apic_smt_update(void) { }
#endif
+struct msi_msg;
+
+#ifdef CONFIG_PCI_MSI
+void x86_vector_msi_compose_msg(struct irq_data *data, struct msi_msg *msg);
+#else
+# define x86_vector_msi_compose_msg NULL
+#endif
+
extern void ioapic_zap_locks(void);
#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 5a42f9206138..51e2bf27cc9b 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -5,6 +5,7 @@
#include <asm/string.h>
#include <asm/page.h>
#include <asm/checksum.h>
+#include <asm/mce.h>
#include <asm-generic/asm-prototypes.h>
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 5c15f95b1ba7..0603c7423aca 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -135,14 +135,21 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
-# define _ASM_NOKPROBE(entry) \
+# ifdef CONFIG_KPROBES
+# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
_ASM_ALIGN ; \
_ASM_PTR (entry); \
.popsection
+# else
+# define _ASM_NOKPROBE(entry)
+# endif
#else /* ! __ASSEMBLY__ */
# define _EXPAND_EXTABLE_HANDLE(x) #x
@@ -160,6 +167,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index abe08690a887..69404eae9983 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -8,7 +8,7 @@
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_mostly __section(".data..read_mostly")
#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index 0ada98d5d09f..bca625a60186 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
#define HAVE_CSUM_COPY_USER
+#define _HAVE_ARCH_CSUM_AND_COPY
#ifdef CONFIG_X86_32
# include <asm/checksum_32.h>
#else
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 11624c8a9d8d..17da95387997 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -27,9 +27,7 @@ asmlinkage __wsum csum_partial(const void *buff, int len, __wsum sum);
* better 64-bit) boundary
*/
-asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
- int len, __wsum sum,
- int *src_err_ptr, int *dst_err_ptr);
+asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
/*
* Note: when you get a NULL pointer exception here this means someone
@@ -38,26 +36,20 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
* If you use these functions directly please don't forget the
* access_ok().
*/
-static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst,
- int len, __wsum sum)
+static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
{
- return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+ return csum_partial_copy_generic(src, dst, len);
}
static inline __wsum csum_and_copy_from_user(const void __user *src,
- void *dst, int len,
- __wsum sum, int *err_ptr)
+ void *dst, int len)
{
__wsum ret;
might_sleep();
- if (!user_access_begin(src, len)) {
- if (len)
- *err_ptr = -EFAULT;
- return sum;
- }
- ret = csum_partial_copy_generic((__force void *)src, dst,
- len, sum, err_ptr, NULL);
+ if (!user_access_begin(src, len))
+ return 0;
+ ret = csum_partial_copy_generic((__force void *)src, dst, len);
user_access_end();
return ret;
@@ -178,23 +170,17 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
*/
static inline __wsum csum_and_copy_to_user(const void *src,
void __user *dst,
- int len, __wsum sum,
- int *err_ptr)
+ int len)
{
__wsum ret;
might_sleep();
- if (user_access_begin(dst, len)) {
- ret = csum_partial_copy_generic(src, (__force void *)dst,
- len, sum, NULL, err_ptr);
- user_access_end();
- return ret;
- }
+ if (!user_access_begin(dst, len))
+ return 0;
- if (len)
- *err_ptr = -EFAULT;
-
- return (__force __wsum)-1; /* invalid checksum */
+ ret = csum_partial_copy_generic(src, (__force void *)dst, len);
+ user_access_end();
+ return ret;
}
#endif /* _ASM_X86_CHECKSUM_32_H */
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 0a289b87e872..407beebadaf4 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -130,17 +130,11 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
/* Do not call this directly. Use the wrappers below */
-extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
- int len, __wsum sum,
- int *src_err_ptr, int *dst_err_ptr);
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
-
-extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
- int len, __wsum isum, int *errp);
-extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
- int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
- int len, __wsum sum);
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
/**
* ip_compute_csum - Compute an 16bit IP checksum.
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index d4edf281fff4..0e327a01f50f 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -27,8 +27,6 @@ typedef u16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
-typedef s64 __attribute__((aligned(4))) compat_s64;
-typedef u64 __attribute__((aligned(4))) compat_u64;
struct compat_stat {
compat_dev_t st_dev;
@@ -211,6 +209,7 @@ static inline bool in_compat_syscall(void)
return in_32bit_syscall();
}
#define in_compat_syscall in_compat_syscall /* override the generic impl */
+#define compat_need_64bit_alignment_fixup in_ia32_syscall
#endif
struct compat_siginfo;
diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
new file mode 100644
index 000000000000..e4991ba96726
--- /dev/null
+++ b/arch/x86/include/asm/copy_mc_test.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _COPY_MC_TEST_H_
+#define _COPY_MC_TEST_H_
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_COPY_MC_TEST
+extern unsigned long copy_mc_test_src;
+extern unsigned long copy_mc_test_dst;
+
+static inline void copy_mc_inject_src(void *addr)
+{
+ if (addr)
+ copy_mc_test_src = (unsigned long) addr;
+ else
+ copy_mc_test_src = ~0UL;
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+ if (addr)
+ copy_mc_test_dst = (unsigned long) addr;
+ else
+ copy_mc_test_dst = ~0UL;
+}
+#else /* CONFIG_COPY_MC_TEST */
+static inline void copy_mc_inject_src(void *addr)
+{
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+}
+#endif /* CONFIG_COPY_MC_TEST */
+
+#else /* __ASSEMBLY__ */
+#include <asm/export.h>
+
+#ifdef CONFIG_COPY_MC_TEST
+.macro COPY_MC_TEST_CTL
+ .pushsection .data
+ .align 8
+ .globl copy_mc_test_src
+ copy_mc_test_src:
+ .quad 0
+ EXPORT_SYMBOL_GPL(copy_mc_test_src)
+ .globl copy_mc_test_dst
+ copy_mc_test_dst:
+ .quad 0
+ EXPORT_SYMBOL_GPL(copy_mc_test_dst)
+ .popsection
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+ leaq \count(\reg), %r9
+ cmp copy_mc_test_src, %r9
+ ja \target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+ leaq \count(\reg), %r9
+ cmp copy_mc_test_dst, %r9
+ ja \target
+.endm
+#else
+.macro COPY_MC_TEST_CTL
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+.endm
+#endif /* CONFIG_COPY_MC_TEST */
+#endif /* __ASSEMBLY__ */
+#endif /* _COPY_MC_TEST_H_ */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 8902fdb7de13..3d52b094850a 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -11,25 +11,29 @@
#ifdef CONFIG_X86_64
/* Macro to enforce the same ordering and stack sizes */
-#define ESTACKS_MEMBERS(guardsize) \
- char DF_stack_guard[guardsize]; \
- char DF_stack[EXCEPTION_STKSZ]; \
- char NMI_stack_guard[guardsize]; \
- char NMI_stack[EXCEPTION_STKSZ]; \
- char DB_stack_guard[guardsize]; \
- char DB_stack[EXCEPTION_STKSZ]; \
- char MCE_stack_guard[guardsize]; \
- char MCE_stack[EXCEPTION_STKSZ]; \
- char IST_top_guard[guardsize]; \
+#define ESTACKS_MEMBERS(guardsize, optional_stack_size) \
+ char DF_stack_guard[guardsize]; \
+ char DF_stack[EXCEPTION_STKSZ]; \
+ char NMI_stack_guard[guardsize]; \
+ char NMI_stack[EXCEPTION_STKSZ]; \
+ char DB_stack_guard[guardsize]; \
+ char DB_stack[EXCEPTION_STKSZ]; \
+ char MCE_stack_guard[guardsize]; \
+ char MCE_stack[EXCEPTION_STKSZ]; \
+ char VC_stack_guard[guardsize]; \
+ char VC_stack[optional_stack_size]; \
+ char VC2_stack_guard[guardsize]; \
+ char VC2_stack[optional_stack_size]; \
+ char IST_top_guard[guardsize]; \
/* The exception stacks' physical storage. No guard pages required */
struct exception_stacks {
- ESTACKS_MEMBERS(0)
+ ESTACKS_MEMBERS(0, 0)
};
/* The effective cpu entry area mapping with guard pages. */
struct cea_exception_stacks {
- ESTACKS_MEMBERS(PAGE_SIZE)
+ ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
};
/*
@@ -40,6 +44,8 @@ enum exception_stack_ordering {
ESTACK_NMI,
ESTACK_DB,
ESTACK_MCE,
+ ESTACK_VC,
+ ESTACK_VC2,
N_EXCEPTION_STACKS
};
@@ -139,4 +145,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
#define __this_cpu_ist_top_va(name) \
CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
+#define __this_cpu_ist_bottom_va(name) \
+ CEA_ESTACK_BOT(__this_cpu_read(cea_exception_stacks), name)
+
#endif
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..dad350d42ecf 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-/* free ( 3*32+17) */
+#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -236,6 +236,7 @@
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -288,6 +289,7 @@
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -353,6 +355,7 @@
#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
@@ -368,6 +371,7 @@
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index e89558a3fe4a..cfdf307ddc01 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -90,8 +90,6 @@ static __always_inline bool hw_breakpoint_active(void)
return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
}
-extern void aout_dump_debugregs(struct user *dump);
-
extern void hw_breakpoint_restore(void);
static __always_inline unsigned long local_db_save(void)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1ced11d31932..476082a83d1c 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -383,6 +383,33 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
void alloc_intr_gate(unsigned int n, const void *addr);
+static inline void init_idt_data(struct idt_data *data, unsigned int n,
+ const void *addr)
+{
+ BUG_ON(n > 0xFF);
+
+ memset(data, 0, sizeof(*data));
+ data->vector = n;
+ data->addr = addr;
+ data->segment = __KERNEL_CS;
+ data->bits.type = GATE_INTERRUPT;
+ data->bits.p = 1;
+}
+
+static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
+{
+ unsigned long addr = (unsigned long) d->addr;
+
+ gate->offset_low = (u16) addr;
+ gate->segment = (u16) d->segment;
+ gate->bits = d->bits;
+ gate->offset_middle = (u16) (addr >> 16);
+#ifdef CONFIG_X86_64
+ gate->offset_high = (u32) (addr >> 32);
+ gate->reserved = 0;
+#endif
+}
+
extern unsigned long system_vectors[];
extern void load_current_idt(void);
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index a91f3b6e4f2a..f7e7099af595 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -74,6 +74,13 @@ struct idt_bits {
p : 1;
} __attribute__((packed));
+struct idt_data {
+ unsigned int vector;
+ unsigned int segment;
+ struct idt_bits bits;
+ const void *addr;
+};
+
struct gate_struct {
u16 offset_low;
u16 segment;
@@ -109,6 +116,9 @@ struct desc_ptr {
#endif /* !__ASSEMBLY__ */
+/* Boot IDT definitions */
+#define BOOT_IDT_ENTRIES 32
+
/* Access rights as returned by LAR */
#define AR_TYPE_RODATA (0 * (1 << 9))
#define AR_TYPE_RWDATA (1 * (1 << 9))
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 4ea8584682f9..5861d34f9771 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,6 +56,12 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_IOMMU_SUPPORT
+# define DISABLE_ENQCMD 0
+#else
+# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -75,7 +81,8 @@
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
+#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+ DISABLE_ENQCMD)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index fed67eafcacc..bb1654fe0ce7 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -8,10 +8,8 @@
*/
#include <linux/scatterlist.h>
-#include <linux/dma-debug.h>
#include <asm/io.h>
#include <asm/swiotlb.h>
-#include <linux/dma-contiguous.h>
extern int iommu_merge;
extern int panic_on_overflow;
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index d8c2198d543b..1f0cbc52937c 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,10 +29,17 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
+enum handler_type {
+ EX_HANDLER_NONE,
+ EX_HANDLER_FAULT,
+ EX_HANDLER_UACCESS,
+ EX_HANDLER_OTHER
+};
+
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern bool ex_has_fault_handler(unsigned long ip);
+extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#endif
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0f0dd645b594..77217bd292bd 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -99,7 +99,7 @@ enum fixed_addresses {
FIX_PCIE_MCFG,
#endif
#endif
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
FIX_PARAVIRT_BOOTMAP,
#endif
#ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b774c52e5411..dcd9503b1098 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -62,4 +62,16 @@ extern void switch_fpu_return(void);
*/
extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
+/*
+ * Tasks that are not using SVA have mm->pasid set to zero to note that they
+ * will not have the valid bit set in MSR_IA32_PASID while they are running.
+ */
+#define PASID_DISABLED 0
+
+#ifdef CONFIG_IOMMU_SUPPORT
+/* Update current's PASID MSR/state by mm's PASID. */
+void update_pasid(void);
+#else
+static inline void update_pasid(void) { }
+#endif
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0a460f2a3f90..8d33ad80704f 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -19,6 +19,7 @@
#include <asm/user.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
+#include <asm/fpu/xcr.h>
#include <asm/cpufeature.h>
#include <asm/trace/fpu.h>
@@ -583,38 +584,13 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
pkru_val = pk->pkru;
}
__write_pkru(pkru_val);
-}
-
-/*
- * MXCSR and XCR definitions:
- */
-
-static inline void ldmxcsr(u32 mxcsr)
-{
- asm volatile("ldmxcsr %0" :: "m" (mxcsr));
-}
-
-extern unsigned int mxcsr_feature_mask;
-
-#define XCR_XFEATURE_ENABLED_MASK 0x00000000
-static inline u64 xgetbv(u32 index)
-{
- u32 eax, edx;
-
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
- : : "a" (eax), "d" (edx), "c" (index));
+ /*
+ * Expensive PASID MSR write will be avoided in update_pasid() because
+ * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
+ * unless it's different from mm->pasid to reduce overhead.
+ */
+ update_pasid();
}
#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index c87364ea6446..f5a38a5f3ae1 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -114,7 +114,7 @@ enum xfeature {
XFEATURE_Hi16_ZMM,
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
XFEATURE_PKRU,
- XFEATURE_RSRVD_COMP_10,
+ XFEATURE_PASID,
XFEATURE_RSRVD_COMP_11,
XFEATURE_RSRVD_COMP_12,
XFEATURE_RSRVD_COMP_13,
@@ -134,6 +134,7 @@ enum xfeature {
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
#define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID)
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
@@ -256,6 +257,14 @@ struct arch_lbr_state {
struct lbr_entry entries[];
} __packed;
+/*
+ * State component 10 is supervisor state used for context-switching the
+ * PASID state.
+ */
+struct ia32_pasid_state {
+ u64 pasid;
+} __packed;
+
struct xstate_header {
u64 xfeatures;
u64 xcomp_bv;
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
new file mode 100644
index 000000000000..1c7ab8d95da5
--- /dev/null
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_FPU_XCR_H
+#define _ASM_X86_FPU_XCR_H
+
+/*
+ * MXCSR and XCR definitions:
+ */
+
+static inline void ldmxcsr(u32 mxcsr)
+{
+ asm volatile("ldmxcsr %0" :: "m" (mxcsr));
+}
+
+extern unsigned int mxcsr_feature_mask;
+
+#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+#endif /* _ASM_X86_FPU_XCR_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 14ab815132d4..47a92232d595 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -35,7 +35,7 @@
XFEATURE_MASK_BNDCSR)
/* All currently supported supervisor features */
-#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
/*
* A supervisor state component may not always contain valuable information,
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 296b346184b2..fb42659f6e98 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -60,12 +60,26 @@
#define FRAME_END "pop %" _ASM_BP "\n"
#ifdef CONFIG_X86_64
+
#define ENCODE_FRAME_POINTER \
"lea 1(%rsp), %rbp\n\t"
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+ return (unsigned long)regs + 1;
+}
+
#else /* !CONFIG_X86_64 */
+
#define ENCODE_FRAME_POINTER \
"movl %esp, %ebp\n\t" \
"andl $0x7fffffff, %ebp\n\t"
+
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+ return (unsigned long)regs & 0x7fffffff;
+}
+
#endif /* CONFIG_X86_64 */
#endif /* __ASSEMBLY__ */
@@ -83,6 +97,11 @@
#define ENCODE_FRAME_POINTER
+static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
+{
+ return 0;
+}
+
#endif
#define FRAME_BEGIN
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index d552646411a9..35cff5f2becf 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -57,7 +57,7 @@ static inline unsigned long x86_fsbase_read_cpu(void)
{
unsigned long fsbase;
- if (static_cpu_has(X86_FEATURE_FSGSBASE))
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE))
fsbase = rdfsbase();
else
rdmsrl(MSR_FS_BASE, fsbase);
@@ -67,7 +67,7 @@ static inline unsigned long x86_fsbase_read_cpu(void)
static inline void x86_fsbase_write_cpu(unsigned long fsbase)
{
- if (static_cpu_has(X86_FEATURE_FSGSBASE))
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE))
wrfsbase(fsbase);
else
wrmsrl(MSR_FS_BASE, fsbase);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 74c12437401e..a4aeeaace040 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -36,61 +36,56 @@ struct msi_desc;
enum irq_alloc_type {
X86_IRQ_ALLOC_TYPE_IOAPIC = 1,
X86_IRQ_ALLOC_TYPE_HPET,
- X86_IRQ_ALLOC_TYPE_MSI,
- X86_IRQ_ALLOC_TYPE_MSIX,
+ X86_IRQ_ALLOC_TYPE_PCI_MSI,
+ X86_IRQ_ALLOC_TYPE_PCI_MSIX,
X86_IRQ_ALLOC_TYPE_DMAR,
X86_IRQ_ALLOC_TYPE_UV,
+ X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT,
+ X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT,
};
+struct ioapic_alloc_info {
+ int pin;
+ int node;
+ u32 trigger : 1;
+ u32 polarity : 1;
+ u32 valid : 1;
+ struct IO_APIC_route_entry *entry;
+};
+
+struct uv_alloc_info {
+ int limit;
+ int blade;
+ unsigned long offset;
+ char *name;
+
+};
+
+/**
+ * irq_alloc_info - X86 specific interrupt allocation info
+ * @type: X86 specific allocation type
+ * @flags: Flags for allocation tweaks
+ * @devid: Device ID for allocations
+ * @hwirq: Associated hw interrupt number in the domain
+ * @mask: CPU mask for vector allocation
+ * @desc: Pointer to msi descriptor
+ * @data: Allocation specific data
+ *
+ * @ioapic: IOAPIC specific allocation data
+ * @uv: UV specific allocation data
+*/
struct irq_alloc_info {
enum irq_alloc_type type;
u32 flags;
- const struct cpumask *mask; /* CPU mask for vector allocation */
+ u32 devid;
+ irq_hw_number_t hwirq;
+ const struct cpumask *mask;
+ struct msi_desc *desc;
+ void *data;
+
union {
- int unused;
-#ifdef CONFIG_HPET_TIMER
- struct {
- int hpet_id;
- int hpet_index;
- void *hpet_data;
- };
-#endif
-#ifdef CONFIG_PCI_MSI
- struct {
- struct pci_dev *msi_dev;
- irq_hw_number_t msi_hwirq;
- };
-#endif
-#ifdef CONFIG_X86_IO_APIC
- struct {
- int ioapic_id;
- int ioapic_pin;
- int ioapic_node;
- u32 ioapic_trigger : 1;
- u32 ioapic_polarity : 1;
- u32 ioapic_valid : 1;
- struct IO_APIC_route_entry *ioapic_entry;
- };
-#endif
-#ifdef CONFIG_DMAR_TABLE
- struct {
- int dmar_id;
- void *dmar_data;
- };
-#endif
-#ifdef CONFIG_X86_UV
- struct {
- int uv_limit;
- int uv_blade;
- unsigned long uv_offset;
- char *uv_name;
- };
-#endif
-#if IS_ENABLED(CONFIG_VMD)
- struct {
- struct msi_desc *desc;
- };
-#endif
+ struct ioapic_alloc_info ioapic;
+ struct uv_alloc_info uv;
};
};
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 7a4d2062385c..0ed20e8bba9e 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -28,39 +28,6 @@
#define HYPERV_CPUID_MAX 0x4000ffff
/*
- * Aliases for Group A features that have X64 in the name.
- * On x86/x64 these are HYPERV_CPUID_FEATURES.EAX bits.
- */
-
-#define HV_X64_MSR_VP_RUNTIME_AVAILABLE \
- HV_MSR_VP_RUNTIME_AVAILABLE
-#define HV_X64_MSR_SYNIC_AVAILABLE \
- HV_MSR_SYNIC_AVAILABLE
-#define HV_X64_MSR_APIC_ACCESS_AVAILABLE \
- HV_MSR_APIC_ACCESS_AVAILABLE
-#define HV_X64_MSR_HYPERCALL_AVAILABLE \
- HV_MSR_HYPERCALL_AVAILABLE
-#define HV_X64_MSR_VP_INDEX_AVAILABLE \
- HV_MSR_VP_INDEX_AVAILABLE
-#define HV_X64_MSR_RESET_AVAILABLE \
- HV_MSR_RESET_AVAILABLE
-#define HV_X64_MSR_GUEST_IDLE_AVAILABLE \
- HV_MSR_GUEST_IDLE_AVAILABLE
-#define HV_X64_ACCESS_FREQUENCY_MSRS \
- HV_ACCESS_FREQUENCY_MSRS
-#define HV_X64_ACCESS_REENLIGHTENMENT \
- HV_ACCESS_REENLIGHTENMENT
-#define HV_X64_ACCESS_TSC_INVARIANT \
- HV_ACCESS_TSC_INVARIANT
-
-/*
- * Aliases for Group B features that have X64 in the name.
- * On x86/x64 these are HYPERV_CPUID_FEATURES.EBX bits.
- */
-#define HV_X64_POST_MESSAGES HV_POST_MESSAGES
-#define HV_X64_SIGNAL_EVENTS HV_SIGNAL_EVENTS
-
-/*
* Group D Features. The bit assignments are custom to each architecture.
* On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits.
*/
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index a43366191212..b2442eb0ac2f 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -242,7 +242,7 @@ __visible noinstr void func(struct pt_regs *regs) \
instrumentation_begin(); \
irq_enter_rcu(); \
kvm_set_cpu_l1tf_flush_l1d(); \
- run_on_irqstack_cond(__##func, regs, regs); \
+ run_sysvec_on_irqstack_cond(__##func, regs); \
irq_exit_rcu(); \
instrumentation_end(); \
irqentry_exit(regs, state); \
@@ -309,6 +309,19 @@ static __always_inline void __##func(struct pt_regs *regs)
__visible void noist_##func(struct pt_regs *regs)
/**
+ * DECLARE_IDTENTRY_VC - Declare functions for the VC entry point
+ * @vector: Vector number (ignored for C)
+ * @func: Function name of the entry point
+ *
+ * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE, but declares also the
+ * safe_stack C handler.
+ */
+#define DECLARE_IDTENTRY_VC(vector, func) \
+ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
+ __visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \
+ __visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
+
+/**
* DEFINE_IDTENTRY_IST - Emit code for IST entry points
* @func: Function name of the entry point
*
@@ -347,6 +360,35 @@ static __always_inline void __##func(struct pt_regs *regs)
#define DEFINE_IDTENTRY_DF(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
+/**
+ * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
+ which runs on a safe stack.
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
+
+/**
+ * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
+ which runs on the VC fall-back stack
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC_IST(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)
+
+/**
+ * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
+ * @func: Function name of the entry point
+ *
+ * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
+ */
+#define DEFINE_IDTENTRY_VC(func) \
+ DEFINE_IDTENTRY_RAW_ERRORCODE(func)
+
#else /* CONFIG_X86_64 */
/**
@@ -433,6 +475,9 @@ __visible noinstr void func(struct pt_regs *regs, \
# define DECLARE_IDTENTRY_XENCB(vector, func) \
DECLARE_IDTENTRY(vector, func)
+# define DECLARE_IDTENTRY_VC(vector, func) \
+ idtentry_vc vector asm_##func func
+
#else
# define DECLARE_IDTENTRY_MCE(vector, func) \
DECLARE_IDTENTRY(vector, func)
@@ -547,7 +592,7 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check);
/* NMI */
DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
-#if defined(CONFIG_XEN_PV) && defined(CONFIG_X86_64)
+#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi);
#endif
@@ -557,13 +602,18 @@ DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug);
#else
DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug);
#endif
-#if defined(CONFIG_XEN_PV) && defined(CONFIG_X86_64)
+#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug);
#endif
/* #DF */
DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
+/* #VC */
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication);
+#endif
+
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback);
#endif
@@ -591,10 +641,6 @@ DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function);
#endif
#ifdef CONFIG_X86_LOCAL_APIC
-# ifdef CONFIG_X86_UV
-DECLARE_IDTENTRY_SYSVEC(UV_BAU_MESSAGE, sysvec_uv_bau_message);
-# endif
-
# ifdef CONFIG_X86_MCE_THRESHOLD
DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold);
# endif
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 2b6ccf2c49f1..a0f839aa144d 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -15,9 +15,15 @@
#define INSN_CODE_SEG_OPND_SZ(params) (params & 0xf)
#define INSN_CODE_SEG_PARAMS(oper_sz, addr_sz) (oper_sz | (addr_sz << 4))
+bool insn_has_rep_prefix(struct insn *insn);
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
+int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs);
+int insn_fetch_from_user(struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE]);
+bool insn_decode(struct insn *insn, struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE], int buf_size);
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index de58391bdee0..cf0e25f45422 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -43,7 +43,7 @@ struct devs_id {
#define sfi_device(i) \
static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \
- __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
+ __section(".x86_intel_mid_dev.init") = &i
/**
* struct mid_sd_board_info - template for SD device creation
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e1aa17a468a8..d726459d08e5 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -401,7 +401,7 @@ extern bool phys_mem_access_encrypted(unsigned long phys_addr,
/**
* iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
- * @__dst: destination, in MMIO space (must be 512-bit aligned)
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
* @src: source
* @count: number of 512 bits quantities to submit
*
@@ -412,25 +412,14 @@ extern bool phys_mem_access_encrypted(unsigned long phys_addr,
* Warning: Do not use this helper unless your driver has checked that the CPU
* instruction is supported on the platform.
*/
-static inline void iosubmit_cmds512(void __iomem *__dst, const void *src,
+static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
size_t count)
{
- /*
- * Note that this isn't an "on-stack copy", just definition of "dst"
- * as a pointer to 64-bytes of stuff that is going to be overwritten.
- * In the MOVDIR64B case that may be needed as you can use the
- * MOVDIR64B instruction to copy arbitrary memory around. This trick
- * lets the compiler know how much gets clobbered.
- */
- volatile struct { char _[64]; } *dst = __dst;
const u8 *from = src;
const u8 *end = from + count * 64;
while (from < end) {
- /* MOVDIR64B [rdx], rax */
- asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
- : "=m" (dst)
- : "d" (from), "a" (dst));
+ movdir64b(dst, from);
from += 64;
}
}
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 4bc985f1e2e4..af4a151d70b3 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -45,8 +45,6 @@ extern int irq_remap_enable_fault_handling(void);
extern void panic_if_irq_remap(const char *msg);
extern struct irq_domain *
-irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info);
-extern struct irq_domain *
irq_remapping_get_irq_domain(struct irq_alloc_info *info);
/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
@@ -74,12 +72,6 @@ static inline void panic_if_irq_remap(const char *msg)
}
static inline struct irq_domain *
-irq_remapping_get_ir_irq_domain(struct irq_alloc_info *info)
-{
- return NULL;
-}
-
-static inline struct irq_domain *
irq_remapping_get_irq_domain(struct irq_alloc_info *info)
{
return NULL;
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 4ae66f097101..775816965c6a 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -12,20 +12,50 @@ static __always_inline bool irqstack_active(void)
return __this_cpu_read(irq_count) != -1;
}
-void asm_call_on_stack(void *sp, void *func, void *arg);
+void asm_call_on_stack(void *sp, void (*func)(void), void *arg);
+void asm_call_sysvec_on_stack(void *sp, void (*func)(struct pt_regs *regs),
+ struct pt_regs *regs);
+void asm_call_irq_on_stack(void *sp, void (*func)(struct irq_desc *desc),
+ struct irq_desc *desc);
-static __always_inline void __run_on_irqstack(void *func, void *arg)
+static __always_inline void __run_on_irqstack(void (*func)(void))
{
void *tos = __this_cpu_read(hardirq_stack_ptr);
__this_cpu_add(irq_count, 1);
- asm_call_on_stack(tos - 8, func, arg);
+ asm_call_on_stack(tos - 8, func, NULL);
+ __this_cpu_sub(irq_count, 1);
+}
+
+static __always_inline void
+__run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs),
+ struct pt_regs *regs)
+{
+ void *tos = __this_cpu_read(hardirq_stack_ptr);
+
+ __this_cpu_add(irq_count, 1);
+ asm_call_sysvec_on_stack(tos - 8, func, regs);
+ __this_cpu_sub(irq_count, 1);
+}
+
+static __always_inline void
+__run_irq_on_irqstack(void (*func)(struct irq_desc *desc),
+ struct irq_desc *desc)
+{
+ void *tos = __this_cpu_read(hardirq_stack_ptr);
+
+ __this_cpu_add(irq_count, 1);
+ asm_call_irq_on_stack(tos - 8, func, desc);
__this_cpu_sub(irq_count, 1);
}
#else /* CONFIG_X86_64 */
static inline bool irqstack_active(void) { return false; }
-static inline void __run_on_irqstack(void *func, void *arg) { }
+static inline void __run_on_irqstack(void (*func)(void)) { }
+static inline void __run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs),
+ struct pt_regs *regs) { }
+static inline void __run_irq_on_irqstack(void (*func)(struct irq_desc *desc),
+ struct irq_desc *desc) { }
#endif /* !CONFIG_X86_64 */
static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
@@ -37,17 +67,40 @@ static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
return !user_mode(regs) && !irqstack_active();
}
-static __always_inline void run_on_irqstack_cond(void *func, void *arg,
+
+static __always_inline void run_on_irqstack_cond(void (*func)(void),
struct pt_regs *regs)
{
- void (*__func)(void *arg) = func;
+ lockdep_assert_irqs_disabled();
+
+ if (irq_needs_irq_stack(regs))
+ __run_on_irqstack(func);
+ else
+ func();
+}
+
+static __always_inline void
+run_sysvec_on_irqstack_cond(void (*func)(struct pt_regs *regs),
+ struct pt_regs *regs)
+{
+ lockdep_assert_irqs_disabled();
+ if (irq_needs_irq_stack(regs))
+ __run_sysvec_on_irqstack(func, regs);
+ else
+ func(regs);
+}
+
+static __always_inline void
+run_irq_on_irqstack_cond(void (*func)(struct irq_desc *desc), struct irq_desc *desc,
+ struct pt_regs *regs)
+{
lockdep_assert_irqs_disabled();
if (irq_needs_irq_stack(regs))
- __run_on_irqstack(__func, arg);
+ __run_irq_on_irqstack(func, desc);
else
- __func(arg);
+ func(desc);
}
#endif
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index c066ffae222b..cd684d45cb5f 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -51,9 +51,13 @@ extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
#endif /* CONFIG_X86_IO_APIC */
#ifdef CONFIG_PCI_MSI
-extern void arch_init_msi_domain(struct irq_domain *domain);
+void x86_create_pci_msi_domain(void);
+struct irq_domain *native_create_pci_msi_domain(void);
+extern struct irq_domain *x86_pci_msi_default_domain;
#else
-static inline void arch_init_msi_domain(struct irq_domain *domain) { }
+static inline void x86_create_pci_msi_domain(void) { }
+#define native_create_pci_msi_domain NULL
+#define x86_pci_msi_default_domain NULL
#endif
#endif
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 02a0cf547d7b..2dfc8d380dab 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -9,7 +9,7 @@
#include <asm/nospec-branch.h>
/* Provide __cpuidle; we can't safely include <linux/cpu.h> */
-#define __cpuidle __attribute__((__section__(".cpuidle.text")))
+#define __cpuidle __section(".cpuidle.text")
/*
* Interrupt control:
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 143bc9abe99c..991a7ad540c7 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -106,5 +106,9 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_int3_handler(struct pt_regs *regs);
extern int kprobe_debug_handler(struct pt_regs *regs);
+#else
+
+static inline int kprobe_debug_handler(struct pt_regs *regs) { return 0; }
+
#endif /* CONFIG_KPROBES */
#endif /* _ASM_X86_KPROBES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5303dbc5c9bc..d44858b69353 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -80,13 +80,14 @@
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
-#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
+#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
#define KVM_REQ_APICV_UPDATE \
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
#define KVM_REQ_HV_TLB_FLUSH \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
+#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -132,7 +133,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 80
+#define KVM_MAX_CPUID_ENTRIES 256
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
@@ -636,7 +637,7 @@ struct kvm_vcpu_arch {
int halt_request; /* real mode on Intel only */
int cpuid_nent;
- struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+ struct kvm_cpuid_entry2 *cpuid_entries;
int maxphyaddr;
int max_tdp_level;
@@ -788,6 +789,21 @@ struct kvm_vcpu_arch {
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
+
+ /* pv related cpuid info */
+ struct {
+ /*
+ * value of the eax register in the KVM_CPUID_FEATURES CPUID
+ * leaf.
+ */
+ u32 features;
+
+ /*
+ * indicates whether pv emulation should be disabled if features
+ * are not present in the guest's cpuid
+ */
+ bool enforce;
+ } pv_cpuid;
};
struct kvm_lpage_info {
@@ -860,6 +876,13 @@ struct kvm_hv {
struct kvm_hv_syndbg hv_syndbg;
};
+struct msr_bitmap_range {
+ u32 flags;
+ u32 nmsrs;
+ u32 base;
+ unsigned long *bitmap;
+};
+
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
@@ -961,8 +984,31 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+ /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
+ u32 user_space_msr_mask;
+
+ struct {
+ u8 count;
+ bool default_allow:1;
+ struct msr_bitmap_range ranges[16];
+ } msr_filter;
+
struct kvm_pmu_event_filter *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
+
+ /*
+ * Whether the TDP MMU is enabled for this VM. This contains a
+ * snapshot of the TDP MMU module parameter from when the VM was
+ * created and remains unchanged for the life of the VM. If this is
+ * true, TDP MMU handler functions will run for various MMU
+ * operations.
+ */
+ bool tdp_mmu_enabled;
+
+ /* List of struct tdp_mmu_pages being used as roots */
+ struct list_head tdp_mmu_roots;
+ /* List of struct tdp_mmu_pages not being used as roots */
+ struct list_head tdp_mmu_pages;
};
struct kvm_vm_stat {
@@ -1069,7 +1115,7 @@ struct kvm_x86_ops {
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
- void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+ int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
@@ -1143,7 +1189,12 @@ struct kvm_x86_ops {
/* Returns actual tsc_offset set in active VMCS */
u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
- void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
+ /*
+ * Retrieve somewhat arbitrary exit information. Intended to be used
+ * only from within tracepoints to avoid VMREADs when tracing is off.
+ */
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+ u32 *exit_int_info, u32 *exit_int_info_err_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
@@ -1221,12 +1272,13 @@ struct kvm_x86_ops {
int (*get_msr_feature)(struct kvm_msr_entry *entry);
- bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
void (*migrate_timers)(struct kvm_vcpu *vcpu);
+ void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
};
struct kvm_x86_nested_ops {
@@ -1238,7 +1290,7 @@ struct kvm_x86_nested_ops {
int (*set_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
- bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+ bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int (*enable_evmcs)(struct kvm_vcpu *vcpu,
@@ -1612,8 +1664,8 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-void kvm_define_shared_msr(unsigned index, u32 msr);
-int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cf503824529c..a0f147893a04 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -136,9 +136,24 @@
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
+
+/*
+ * Indicates an MCE which has happened in kernel space but from
+ * which the kernel can recover simply by executing fixup_exception()
+ * so that an error is returned to the caller of the function that
+ * hit the machine check.
+ */
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
/*
+ * Indicates an MCE that happened in kernel space while copying data
+ * from user. In this case fixup_exception() gets the kernel to the
+ * error exit for the copy function. Machine check handler can then
+ * treat it like a fault taken in user mode.
+ */
+#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
+
+/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
* debugging tools. Each entry is only valid when its finished flag
@@ -174,6 +189,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
extern int mce_p5_enabled;
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+extern void enable_copy_mc_fragile(void);
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
+#else
+static inline void enable_copy_mc_fragile(void)
+{
+}
+#endif
+
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
@@ -200,12 +224,8 @@ void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device);
-/*
- * Maximum banks number.
- * This is the limit of the current register layout on
- * Intel CPUs.
- */
-#define MAX_NR_BANKS 32
+/* Maximum number of MCA banks per CPU. */
+#define MAX_NR_BANKS 64
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
@@ -328,7 +348,6 @@ enum smca_bank_types {
struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
- u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
u8 count; /* Number of instances. */
};
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index eb59804b6201..000000000000
--- a/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_MCSAFE_TEST
-extern unsigned long mcsafe_test_src;
-extern unsigned long mcsafe_test_dst;
-
-static inline void mcsafe_inject_src(void *addr)
-{
- if (addr)
- mcsafe_test_src = (unsigned long) addr;
- else
- mcsafe_test_src = ~0UL;
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
- if (addr)
- mcsafe_test_dst = (unsigned long) addr;
- else
- mcsafe_test_dst = ~0UL;
-}
-#else /* CONFIG_MCSAFE_TEST */
-static inline void mcsafe_inject_src(void *addr)
-{
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_MCSAFE_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_MCSAFE_TEST
-.macro MCSAFE_TEST_CTL
- .pushsection .data
- .align 8
- .globl mcsafe_test_src
- mcsafe_test_src:
- .quad 0
- EXPORT_SYMBOL_GPL(mcsafe_test_src)
- .globl mcsafe_test_dst
- mcsafe_test_dst:
- .quad 0
- EXPORT_SYMBOL_GPL(mcsafe_test_dst)
- .popsection
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
- leaq \count(\reg), %r9
- cmp mcsafe_test_src, %r9
- ja \target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
- leaq \count(\reg), %r9
- cmp mcsafe_test_dst, %r9
- ja \target
-.endm
-#else
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* CONFIG_MCSAFE_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 5049f6c22683..2f62bbdd9d12 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -19,6 +19,7 @@
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern u64 sme_me_mask;
+extern u64 sev_status;
extern bool sev_enabled;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
@@ -48,10 +49,12 @@ void __init mem_encrypt_free_decrypted_mem(void);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
+void __init sev_es_init_vc_handling(void);
bool sme_active(void);
bool sev_active(void);
+bool sev_es_active(void);
-#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+#define __bss_decrypted __section(".bss..decrypted")
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -70,8 +73,10 @@ static inline void __init sme_early_init(void) { }
static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }
+static inline void sev_es_init_vc_handling(void) { }
static inline bool sme_active(void) { return false; }
static inline bool sev_active(void) { return false; }
+static inline bool sev_es_active(void) { return false; }
static inline int __init
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 606cbaebd336..e90ac7e9ae2c 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -67,21 +67,11 @@ static inline void find_smp_config(void)
#ifdef CONFIG_X86_MPPARSE
extern void e820__memblock_alloc_reserved_mpc_new(void);
extern int enable_update_mptable;
-extern int default_mpc_apic_id(struct mpc_cpu *m);
-extern void default_smp_read_mpc_oem(struct mpc_table *mpc);
-# ifdef CONFIG_X86_IO_APIC
-extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
-# else
-# define default_mpc_oem_bus_info NULL
-# endif
extern void default_find_smp_config(void);
extern void default_get_smp_config(unsigned int early);
#else
static inline void e820__memblock_alloc_reserved_mpc_new(void) { }
#define enable_update_mptable 0
-#define default_mpc_apic_id NULL
-#define default_smp_read_mpc_oem NULL
-#define default_mpc_oem_bus_info NULL
#define default_find_smp_config x86_init_noop
#define default_get_smp_config x86_init_uint_noop
#endif
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 4f77b8f22e54..ffc289992d1b 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -54,6 +54,7 @@ typedef int (*hyperv_fill_flush_list_func)(
#define hv_enable_vdso_clocksource() \
vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
#define hv_get_raw_timer() rdtsc_ordered()
+#define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR
/*
* Reference to pv_ops must be inline so objtool
diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index 25ddd0916bb2..cd30013d15d3 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -9,6 +9,4 @@ typedef struct irq_alloc_info msi_alloc_info_t;
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
msi_alloc_info_t *arg);
-void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
-
#endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2859ee4f39a8..972a34d93505 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -257,6 +257,9 @@
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de
+#define MSR_IA32_PASID 0x00000d93
+#define MSR_IA32_PASID_VALID BIT_ULL(31)
+
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
@@ -464,11 +467,15 @@
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
@@ -857,11 +864,14 @@
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+#define MSR_PERF_METRICS 0x00000329
+
/* PERF_GLOBAL_OVF_CTL bits */
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 86f20d520a07..0b4920a7238e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -60,22 +60,20 @@ struct saved_msrs {
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
-#ifdef CONFIG_TRACEPOINTS
/*
* Be very careful with includes. This header is prone to include loops.
*/
#include <asm/atomic.h>
#include <linux/tracepoint-defs.h>
-extern struct tracepoint __tracepoint_read_msr;
-extern struct tracepoint __tracepoint_write_msr;
-extern struct tracepoint __tracepoint_rdpmc;
-#define msr_tracepoint_active(t) static_key_false(&(t).key)
+#ifdef CONFIG_TRACEPOINTS
+DECLARE_TRACEPOINT(read_msr);
+DECLARE_TRACEPOINT(write_msr);
+DECLARE_TRACEPOINT(rdpmc);
extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
#else
-#define msr_tracepoint_active(t) false
static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
@@ -128,7 +126,7 @@ static inline unsigned long long native_read_msr(unsigned int msr)
val = __rdmsr(msr);
- if (msr_tracepoint_active(__tracepoint_read_msr))
+ if (tracepoint_enabled(read_msr))
do_trace_read_msr(msr, val, 0);
return val;
@@ -150,7 +148,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
: "c" (msr), [fault] "i" (-EIO));
- if (msr_tracepoint_active(__tracepoint_read_msr))
+ if (tracepoint_enabled(read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err);
return EAX_EDX_VAL(val, low, high);
}
@@ -161,7 +159,7 @@ native_write_msr(unsigned int msr, u32 low, u32 high)
{
__wrmsr(msr, low, high);
- if (msr_tracepoint_active(__tracepoint_write_msr))
+ if (tracepoint_enabled(write_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
@@ -181,7 +179,7 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high)
: "c" (msr), "0" (low), "d" (high),
[fault] "i" (-EIO)
: "memory");
- if (msr_tracepoint_active(__tracepoint_write_msr))
+ if (tracepoint_enabled(write_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), err);
return err;
}
@@ -248,7 +246,7 @@ static inline unsigned long long native_read_pmc(int counter)
DECLARE_ARGS(val, low, high);
asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter));
- if (msr_tracepoint_active(__tracepoint_rdpmc))
+ if (tracepoint_enabled(rdpmc))
do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e7752b4038ff..cb9ad6b73973 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -4,7 +4,7 @@
#define _ASM_X86_NOSPEC_BRANCH_H_
#include <linux/static_key.h>
-#include <linux/frame.h>
+#include <linux/objtool.h>
#include <asm/alternative.h>
#include <asm/alternative-asm.h>
@@ -314,19 +314,19 @@ static inline void mds_idle_clear_cpu_buffers(void)
* lfence
* jmp spec_trap
* do_rop:
- * mov %rax,(%rsp) for x86_64
+ * mov %rcx,(%rsp) for x86_64
* mov %edx,(%esp) for x86_32
* retq
*
* Without retpolines configured:
*
- * jmp *%rax for x86_64
+ * jmp *%rcx for x86_64
* jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
-# define RETPOLINE_RAX_BPF_JIT() \
+# define RETPOLINE_RCX_BPF_JIT_SIZE 17
+# define RETPOLINE_RCX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
@@ -334,7 +334,7 @@ do { \
EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
- EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
+ EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
EMIT1(0xC3); /* retq */ \
} while (0)
# else /* !CONFIG_X86_64 */
@@ -352,9 +352,9 @@ do { \
# endif
#else /* !CONFIG_RETPOLINE */
# ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
-# define RETPOLINE_RAX_BPF_JIT() \
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
+# define RETPOLINE_RCX_BPF_JIT_SIZE 2
+# define RETPOLINE_RCX_BPF_JIT() \
+ EMIT2(0xFF, 0xE1); /* jmp *%rcx */
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index bbfde3d2662f..e3bae2b60a0d 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -3,6 +3,7 @@
#define _ASM_X86_NUMA_H
#include <linux/nodemask.h>
+#include <linux/errno.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
@@ -62,12 +63,14 @@ extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
extern void numa_add_cpu(int cpu);
extern void numa_remove_cpu(int cpu);
+extern void init_gi_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void init_cpu_to_node(void) { }
static inline void numa_add_cpu(int cpu) { }
static inline void numa_remove_cpu(int cpu) { }
+static inline void init_gi_nodes(void) { }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
@@ -77,7 +80,12 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
-void numa_emu_cmdline(char *);
+int numa_emu_cmdline(char *str);
+#else /* CONFIG_NUMA_EMU */
+static inline int numa_emu_cmdline(char *str)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_NUMA_EMU */
#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index d25534940bde..fdbffec4cfde 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -39,27 +39,6 @@
#define ORC_REG_SP_INDIRECT 9
#define ORC_REG_MAX 15
-/*
- * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
- * caller's SP right before it made the call). Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
- * to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
- * points to the iret return frame.
- *
- * The UNWIND_HINT macros are used only for the unwind_hint struct. They
- * aren't used in struct orc_entry due to size and complexity constraints.
- * Objtool converts them to real types when it converts the hints to orc
- * entries.
- */
-#define ORC_TYPE_CALL 0
-#define ORC_TYPE_REGS 1
-#define ORC_TYPE_REGS_IRET 2
-#define UNWIND_HINT_TYPE_RET_OFFSET 3
-
#ifndef __ASSEMBLY__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
@@ -78,19 +57,6 @@ struct orc_entry {
unsigned end:1;
} __packed;
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack for the ORC unwinder.
- *
- * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
- */
-struct unwind_hint {
- u32 ip;
- s16 sp_offset;
- u8 sp_reg;
- u8 type;
- u8 end;
-};
#endif /* __ASSEMBLY__ */
#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 565ad755c785..f462895a33e4 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -42,6 +42,17 @@
#endif /* CONFIG_X86_PAE */
/*
+ * User space process size: 3GB (default).
+ */
+#define IA32_PAGE_OFFSET __PAGE_OFFSET
+#define TASK_SIZE __PAGE_OFFSET
+#define TASK_SIZE_LOW TASK_SIZE
+#define TASK_SIZE_MAX TASK_SIZE
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#define STACK_TOP TASK_SIZE
+#define STACK_TOP_MAX STACK_TOP
+
+/*
* Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
*/
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 288b065955b7..3f49dac03617 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -28,6 +28,7 @@
#define IST_INDEX_NMI 1
#define IST_INDEX_DB 2
#define IST_INDEX_MCE 3
+#define IST_INDEX_VC 4
/*
* Set __PAGE_OFFSET to the most negative possible address +
@@ -59,6 +60,44 @@
#endif
/*
+ * User space process size. This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything executable
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen. This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+ */
+#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
+
+#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
+ 0xc0000000 : 0xFFFFe000)
+
+#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
+ IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
+#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+
+#define STACK_TOP TASK_SIZE_LOW
+#define STACK_TOP_MAX TASK_SIZE_MAX
+
+/*
* Maximum kernel image size is limited to 1 GiB, due to the fixmap living
* in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 3d2afecde50c..d25cc6830e89 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -160,8 +160,6 @@ static inline void wbinvd(void)
PVOP_VCALL0(cpu.wbinvd);
}
-#define get_kernel_rpl() (pv_info.kernel_rpl)
-
static inline u64 paravirt_read_msr(unsigned msr)
{
return PVOP_CALL1(u64, cpu.read_msr, msr);
@@ -277,12 +275,10 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
PVOP_VCALL2(cpu.load_tls, t, cpu);
}
-#ifdef CONFIG_X86_64
static inline void load_gs_index(unsigned int gs)
{
PVOP_VCALL1(cpu.load_gs_index, gs);
}
-#endif
static inline void write_ldt_entry(struct desc_struct *dt, int entry,
const void *desc)
@@ -375,52 +371,22 @@ static inline void paravirt_release_p4d(unsigned long pfn)
static inline pte_t __pte(pteval_t val)
{
- pteval_t ret;
-
- if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pteval_t, mmu.make_pte, val, (u64)val >> 32);
- else
- ret = PVOP_CALLEE1(pteval_t, mmu.make_pte, val);
-
- return (pte_t) { .pte = ret };
+ return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
}
static inline pteval_t pte_val(pte_t pte)
{
- pteval_t ret;
-
- if (sizeof(pteval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pteval_t, mmu.pte_val,
- pte.pte, (u64)pte.pte >> 32);
- else
- ret = PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
-
- return ret;
+ return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
}
static inline pgd_t __pgd(pgdval_t val)
{
- pgdval_t ret;
-
- if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pgdval_t, mmu.make_pgd, val, (u64)val >> 32);
- else
- ret = PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val);
-
- return (pgd_t) { ret };
+ return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
}
static inline pgdval_t pgd_val(pgd_t pgd)
{
- pgdval_t ret;
-
- if (sizeof(pgdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pgdval_t, mmu.pgd_val,
- pgd.pgd, (u64)pgd.pgd >> 32);
- else
- ret = PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
-
- return ret;
+ return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -438,78 +404,34 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned
pte_t *ptep, pte_t old_pte, pte_t pte)
{
- if (sizeof(pteval_t) > sizeof(long))
- /* 5 arg words */
- pv_ops.mmu.ptep_modify_prot_commit(vma, addr, ptep, pte);
- else
- PVOP_VCALL4(mmu.ptep_modify_prot_commit,
- vma, addr, ptep, pte.pte);
+ PVOP_VCALL4(mmu.ptep_modify_prot_commit, vma, addr, ptep, pte.pte);
}
static inline void set_pte(pte_t *ptep, pte_t pte)
{
- if (sizeof(pteval_t) > sizeof(long))
- PVOP_VCALL3(mmu.set_pte, ptep, pte.pte, (u64)pte.pte >> 32);
- else
- PVOP_VCALL2(mmu.set_pte, ptep, pte.pte);
-}
-
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
-{
- if (sizeof(pteval_t) > sizeof(long))
- /* 5 arg words */
- pv_ops.mmu.set_pte_at(mm, addr, ptep, pte);
- else
- PVOP_VCALL4(mmu.set_pte_at, mm, addr, ptep, pte.pte);
+ PVOP_VCALL2(mmu.set_pte, ptep, pte.pte);
}
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- pmdval_t val = native_pmd_val(pmd);
-
- if (sizeof(pmdval_t) > sizeof(long))
- PVOP_VCALL3(mmu.set_pmd, pmdp, val, (u64)val >> 32);
- else
- PVOP_VCALL2(mmu.set_pmd, pmdp, val);
+ PVOP_VCALL2(mmu.set_pmd, pmdp, native_pmd_val(pmd));
}
-#if CONFIG_PGTABLE_LEVELS >= 3
static inline pmd_t __pmd(pmdval_t val)
{
- pmdval_t ret;
-
- if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pmdval_t, mmu.make_pmd, val, (u64)val >> 32);
- else
- ret = PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val);
-
- return (pmd_t) { ret };
+ return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) };
}
static inline pmdval_t pmd_val(pmd_t pmd)
{
- pmdval_t ret;
-
- if (sizeof(pmdval_t) > sizeof(long))
- ret = PVOP_CALLEE2(pmdval_t, mmu.pmd_val,
- pmd.pmd, (u64)pmd.pmd >> 32);
- else
- ret = PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
-
- return ret;
+ return PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
}
static inline void set_pud(pud_t *pudp, pud_t pud)
{
- pudval_t val = native_pud_val(pud);
-
- if (sizeof(pudval_t) > sizeof(long))
- PVOP_VCALL3(mmu.set_pud, pudp, val, (u64)val >> 32);
- else
- PVOP_VCALL2(mmu.set_pud, pudp, val);
+ PVOP_VCALL2(mmu.set_pud, pudp, native_pud_val(pud));
}
-#if CONFIG_PGTABLE_LEVELS >= 4
+
static inline pud_t __pud(pudval_t val)
{
pudval_t ret;
@@ -526,7 +448,7 @@ static inline pudval_t pud_val(pud_t pud)
static inline void pud_clear(pud_t *pudp)
{
- set_pud(pudp, __pud(0));
+ set_pud(pudp, native_make_pud(0));
}
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
@@ -563,40 +485,17 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
} while (0)
#define pgd_clear(pgdp) do { \
- if (pgtable_l5_enabled()) \
- set_pgd(pgdp, __pgd(0)); \
+ if (pgtable_l5_enabled()) \
+ set_pgd(pgdp, native_make_pgd(0)); \
} while (0)
#endif /* CONFIG_PGTABLE_LEVELS == 5 */
static inline void p4d_clear(p4d_t *p4dp)
{
- set_p4d(p4dp, __p4d(0));
+ set_p4d(p4dp, native_make_p4d(0));
}
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
-
-#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
-
-#ifdef CONFIG_X86_PAE
-/* Special-case pte-setting operations for PAE, which can't update a
- 64-bit pte atomically */
-static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
-{
- PVOP_VCALL3(mmu.set_pte_atomic, ptep, pte.pte, pte.pte >> 32);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- PVOP_VCALL3(mmu.pte_clear, mm, addr, ptep);
-}
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
- PVOP_VCALL1(mmu.pmd_clear, pmdp);
-}
-#else /* !CONFIG_X86_PAE */
static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
{
set_pte(ptep, pte);
@@ -605,14 +504,13 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- set_pte_at(mm, addr, ptep, __pte(0));
+ set_pte(ptep, native_make_pte(0));
}
static inline void pmd_clear(pmd_t *pmdp)
{
- set_pmd(pmdp, __pmd(0));
+ set_pmd(pmdp, native_make_pmd(0));
}
-#endif /* CONFIG_X86_PAE */
#define __HAVE_ARCH_START_CONTEXT_SWITCH
static inline void arch_start_context_switch(struct task_struct *prev)
@@ -682,16 +580,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
-#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
-#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
-
/* save and restore all caller-save registers, except return value */
#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;"
#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;"
-
-#define PV_FLAGS_ARG "0"
-#define PV_EXTRA_CLOBBERS
-#define PV_VEXTRA_CLOBBERS
#else
/* save and restore all caller-save registers, except return value */
#define PV_SAVE_ALL_CALLER_REGS \
@@ -712,14 +603,6 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
"pop %rsi;" \
"pop %rdx;" \
"pop %rcx;"
-
-/* We save some registers, but all of them, that's too much. We clobber all
- * caller saved registers but the argument parameter */
-#define PV_SAVE_REGS "pushq %%rdi;"
-#define PV_RESTORE_REGS "popq %%rdi;"
-#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi"
-#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi"
-#define PV_FLAGS_ARG "D"
#endif
/*
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8dfcb2508e6d..0fad9f61c76a 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -68,13 +68,8 @@ struct paravirt_callee_save {
/* general info */
struct pv_info {
#ifdef CONFIG_PARAVIRT_XXL
- unsigned int kernel_rpl;
- int shared_kernel_pmd;
-
-#ifdef CONFIG_X86_64
u16 extra_user_64bit_cs; /* __USER_CS if none */
#endif
-#endif
const char *name;
};
@@ -126,9 +121,7 @@ struct pv_cpu_ops {
void (*set_ldt)(const void *desc, unsigned entries);
unsigned long (*store_tr)(void);
void (*load_tls)(struct thread_struct *t, unsigned int cpu);
-#ifdef CONFIG_X86_64
void (*load_gs_index)(unsigned int idx);
-#endif
void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
const void *desc);
void (*write_gdt_entry)(struct desc_struct *,
@@ -249,8 +242,6 @@ struct pv_mmu_ops {
/* Pagetable manipulation functions */
void (*set_pte)(pte_t *ptep, pte_t pteval);
- void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval);
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
pte_t (*ptep_modify_prot_start)(struct vm_area_struct *vma, unsigned long addr,
@@ -264,21 +255,11 @@ struct pv_mmu_ops {
struct paravirt_callee_save pgd_val;
struct paravirt_callee_save make_pgd;
-#if CONFIG_PGTABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
- void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
- void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
- void (*pmd_clear)(pmd_t *pmdp);
-
-#endif /* CONFIG_X86_PAE */
-
void (*set_pud)(pud_t *pudp, pud_t pudval);
struct paravirt_callee_save pmd_val;
struct paravirt_callee_save make_pmd;
-#if CONFIG_PGTABLE_LEVELS >= 4
struct paravirt_callee_save pud_val;
struct paravirt_callee_save make_pud;
@@ -291,10 +272,6 @@ struct pv_mmu_ops {
void (*set_pgd)(pgd_t *pgdp, pgd_t pgdval);
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
-#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
-
-#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
-
struct pv_lazy_ops lazy_mode;
/* dom0 ops */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 7ccb338507e3..d2c76c8d8cfd 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -105,17 +105,6 @@ static inline void early_quirks(void) { }
extern void pci_iommu_alloc(void);
-#ifdef CONFIG_PCI_MSI
-/* implemented in arch/x86/kernel/apic/io_apic. */
-struct msi_desc;
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
-void native_teardown_msi_irq(unsigned int irq);
-void native_restore_msi_irqs(struct pci_dev *dev);
-#else
-#define native_setup_msi_irqs NULL
-#define native_teardown_msi_irq NULL
-#endif
-
/* generic pci stuff */
#include <asm-generic/pci.h>
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 73bb404f4d2a..490411dba438 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -114,9 +114,20 @@ extern const struct pci_raw_ops pci_direct_conf1;
extern bool port_cf9_safe;
/* arch_initcall level */
+#ifdef CONFIG_PCI_DIRECT
extern int pci_direct_probe(void);
extern void pci_direct_init(int type);
+#else
+static inline int pci_direct_probe(void) { return -1; }
+static inline void pci_direct_init(int type) { }
+#endif
+
+#ifdef CONFIG_PCI_BIOS
extern void pci_pcbios_init(void);
+#else
+static inline void pci_pcbios_init(void) { }
+#endif
+
extern void __init dmi_check_pciprobe(void);
extern void __init dmi_check_skip_isa_align(void);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 0c1b13720525..6960cd6d1f23 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -196,13 +196,29 @@ struct x86_pmu_capability {
* Fixed-purpose performance events:
*/
+/* RDPMC offset for Fixed PMCs */
+#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30)
+#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29)
+
/*
- * All 3 fixed-mode PMCs are configured via this single MSR:
+ * All the fixed-mode PMCs are configured via this single MSR:
*/
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
- * The counts are available in three separate MSRs:
+ * There is no event-code assigned to the fixed-mode PMCs.
+ *
+ * For a fixed-mode PMC, which has an equivalent event on a general-purpose
+ * PMC, the event-code of the equivalent event is used for the fixed-mode PMC,
+ * e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core.
+ *
+ * For a fixed-mode PMC, which doesn't have an equivalent event, a
+ * pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS.
+ * The pseudo event-code for a fixed-mode PMC must be 0x00.
+ * The pseudo umask-code is 0xX. The X equals the index of the fixed
+ * counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300.
+ *
+ * The counts are available in separate MSRs:
*/
/* Instr_Retired.Any: */
@@ -213,29 +229,84 @@ struct x86_pmu_capability {
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
-/* CPU_CLK_Unhalted.Ref: */
+/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
+/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
+#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c
+#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
+#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
+
/*
* We model BTS tracing as another fixed-mode PMC.
*
- * We choose a value in the middle of the fixed event range, since lower
+ * We choose the value 47 for the fixed index of BTS, since lower
* values are used by actual fixed events and higher values are used
* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
*/
-#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15)
+
+/*
+ * The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for
+ * each TopDown metric event.
+ *
+ * Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
+ */
+#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0)
+#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
+#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
+#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
+#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND
+#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \
+ INTEL_PMC_MSK_FIXED_SLOTS)
-#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
-#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
-#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
-#define GLOBAL_STATUS_ASIF BIT_ULL(60)
-#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
-#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
-#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
-#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
+/*
+ * There is no event-code assigned to the TopDown events.
+ *
+ * For the slots event, use the pseudo code of the fixed counter 3.
+ *
+ * For the metric events, the pseudo event-code is 0x00.
+ * The pseudo umask-code starts from the middle of the pseudo event
+ * space, 0x80.
+ */
+#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */
+/* Level 1 metrics */
+#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */
+#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
+#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
+#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
+#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND
+#define INTEL_TD_METRIC_NUM 4
+
+static inline bool is_metric_idx(int idx)
+{
+ return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
+}
+
+static inline bool is_topdown_idx(int idx)
+{
+ return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS;
+}
+#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \
+ (~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
+
+#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
+#define GLOBAL_STATUS_BUFFER_OVF_BIT 62
+#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
+#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
+#define GLOBAL_STATUS_ASIF BIT_ULL(60)
+#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
+#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
+#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
+#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
+#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
+#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
+
+#define GLOBAL_CTRL_EN_PERF_METRICS 48
/*
* We model guest LBR event tracing as another fixed-mode PMC like BTS.
*
@@ -334,6 +405,7 @@ struct pebs_xmm {
#define IBS_OP_ENABLE (1ULL<<17)
#define IBS_OP_MAX_CNT 0x0000FFFFULL
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
+#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
#define IBS_RIP_INVALID (1ULL<<38)
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 80fbb4a9ed87..56baf43befb4 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -20,12 +20,7 @@ typedef union {
} pte_t;
#endif /* !__ASSEMBLY__ */
-#ifdef CONFIG_PARAVIRT_XXL
-#define SHARED_KERNEL_PMD ((!static_cpu_has(X86_FEATURE_PTI) && \
- (pv_info.shared_kernel_pmd)))
-#else
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
-#endif
#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index b836138ce852..a02c67291cfc 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,7 +28,7 @@
#include <asm-generic/pgtable_uffd.h>
extern pgd_t early_top_pgt[PTRS_PER_PGD];
-int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
@@ -63,7 +63,6 @@ extern pmdval_t early_pmd_flags;
#include <asm/paravirt.h>
#else /* !CONFIG_PARAVIRT_XXL */
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
-#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)
@@ -1033,10 +1032,10 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
return res;
}
-static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep , pte_t pte)
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
- native_set_pte(ptep, pte);
+ set_pte(ptep, pte);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 8f63efb2a2cc..52e5f5f2240d 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -159,6 +159,4 @@ extern unsigned int ptrs_per_p4d;
#define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t))
-#define ARCH_PAGE_TABLE_SYNC_MASK (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
-
#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 97143d87994c..82a08b585818 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -482,10 +482,6 @@ extern unsigned int fpu_user_xstate_size;
struct perf_event;
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -517,7 +513,7 @@ struct thread_struct {
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
- unsigned long debugreg6;
+ unsigned long virtual_dr6;
/* Keep track of the exact dr7 value set by the user */
unsigned long ptrace_dr7;
/* Fault info: */
@@ -538,8 +534,6 @@ struct thread_struct {
*/
unsigned long iopl_emul;
- mm_segment_t addr_limit;
-
unsigned int sig_on_uaccess_err:1;
/* Floating point and extended processor state */
@@ -696,6 +690,7 @@ extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
+extern void cpu_init_exception_handling(void);
extern void cr4_init(void);
static inline unsigned long get_debugctlmsr(void)
@@ -782,67 +777,15 @@ static inline void spin_lock_prefetch(const void *x)
})
#ifdef CONFIG_X86_32
-/*
- * User space process size: 3GB (default).
- */
-#define IA32_PAGE_OFFSET PAGE_OFFSET
-#define TASK_SIZE PAGE_OFFSET
-#define TASK_SIZE_LOW TASK_SIZE
-#define TASK_SIZE_MAX TASK_SIZE
-#define DEFAULT_MAP_WINDOW TASK_SIZE
-#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX STACK_TOP
-
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
- .addr_limit = KERNEL_DS, \
}
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
-/*
- * User space process size. This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen. This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
-#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
- 0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-
-#define STACK_TOP TASK_SIZE_LOW
-#define STACK_TOP_MAX TASK_SIZE_MAX
-
-#define INIT_THREAD { \
- .addr_limit = KERNEL_DS, \
-}
+#define INIT_THREAD { }
extern unsigned long KSTK_ESP(struct task_struct *task);
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 28996fe19301..2c35f1c01a2d 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -10,6 +10,7 @@ void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
+void entry_SYSCALL_64_safe_stack(void);
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index b35030eeec36..5db5d083c873 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -21,6 +21,9 @@ struct real_mode_header {
/* SMP trampoline */
u32 trampoline_start;
u32 trampoline_header;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ u32 sev_es_trampoline_start;
+#endif
#ifdef CONFIG_X86_64
u32 trampoline_pgd;
#endif
@@ -57,6 +60,9 @@ extern unsigned char real_mode_blob_end[];
extern unsigned long initial_code;
extern unsigned long initial_gs;
extern unsigned long initial_stack;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern unsigned long initial_vc_handler;
+#endif
extern unsigned char real_mode_blob[];
extern unsigned char real_mode_relocs[];
@@ -66,6 +72,7 @@ extern unsigned char startup_32_smp[];
extern unsigned char boot_gdt[];
#else
extern unsigned char secondary_startup_64[];
+extern unsigned char secondary_startup_64_no_verify[];
#endif
static inline size_t real_mode_size_needed(void)
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6847d85400a8..3ff0d48469f2 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -54,7 +54,7 @@
#endif
#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
/* Paravirtualized systems may not have PSE or PGE available */
#define NEED_PSE 0
#define NEED_PGE 0
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 9646c300f128..7fdd4facfce7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -222,15 +222,11 @@
#endif
-#ifndef CONFIG_PARAVIRT_XXL
-# define get_kernel_rpl() 0
-#endif
-
#define IDT_ENTRIES 256
#define NUM_EXCEPTION_VECTORS 32
/* Bitmask of exception vectors which push an error code on the stack: */
-#define EXCEPTION_ERRCODE_MASK 0x00027d00
+#define EXCEPTION_ERRCODE_MASK 0x20027d00
#define GDT_SIZE (GDT_ENTRIES*8)
#define GDT_ENTRY_TLS_ENTRIES 3
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 84b645cc8bc9..389d851a02c4 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -39,6 +39,8 @@ void vsmp_init(void);
static inline void vsmp_init(void) { }
#endif
+struct pt_regs;
+
void setup_bios_corruption_check(void);
void early_platform_quirks(void);
@@ -48,7 +50,9 @@ extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
extern unsigned long __startup_secondary_64(void);
-extern int early_make_pgtable(unsigned long address);
+extern void startup_64_setup_env(unsigned long physbase);
+extern void early_setup_idt(void);
+extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
#ifdef CONFIG_X86_INTEL_MID
extern void x86_intel_mid_early_setup(void);
@@ -115,7 +119,7 @@ void *extend_brk(size_t size, size_t align);
* executable.)
*/
#define RESERVE_BRK(name,sz) \
- static void __section(.discard.text) __used notrace \
+ static void __section(".discard.text") __used notrace \
__brk_reservation_fn_##name##__(void) { \
asm volatile ( \
".pushsection .brk_reservation,\"aw\",@nobits;" \
diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h
new file mode 100644
index 000000000000..cf1d957c7091
--- /dev/null
+++ b/arch/x86/include/asm/sev-es.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#ifndef __ASM_ENCRYPTED_STATE_H
+#define __ASM_ENCRYPTED_STATE_H
+
+#include <linux/types.h>
+#include <asm/insn.h>
+
+#define GHCB_SEV_INFO 0x001UL
+#define GHCB_SEV_INFO_REQ 0x002UL
+#define GHCB_INFO(v) ((v) & 0xfffUL)
+#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL)
+#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL)
+#define GHCB_PROTO_OUR 0x0001UL
+#define GHCB_SEV_CPUID_REQ 0x004UL
+#define GHCB_CPUID_REQ_EAX 0
+#define GHCB_CPUID_REQ_EBX 1
+#define GHCB_CPUID_REQ_ECX 2
+#define GHCB_CPUID_REQ_EDX 3
+#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \
+ (((unsigned long)reg & 3) << 30) | \
+ (((unsigned long)fn) << 32))
+
+#define GHCB_PROTOCOL_MAX 0x0001UL
+#define GHCB_DEFAULT_USAGE 0x0000UL
+
+#define GHCB_SEV_CPUID_RESP 0x005UL
+#define GHCB_SEV_TERMINATE 0x100UL
+#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \
+ (((((u64)reason_set) & 0x7) << 12) | \
+ ((((u64)reason_val) & 0xff) << 16))
+#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0
+#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1
+
+#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff)
+#define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); }
+
+enum es_result {
+ ES_OK, /* All good */
+ ES_UNSUPPORTED, /* Requested operation not supported */
+ ES_VMM_ERROR, /* Unexpected state from the VMM */
+ ES_DECODE_FAILED, /* Instruction decoding failed */
+ ES_EXCEPTION, /* Instruction caused exception */
+ ES_RETRY, /* Retry instruction emulation */
+};
+
+struct es_fault_info {
+ unsigned long vector;
+ unsigned long error_code;
+ unsigned long cr2;
+};
+
+struct pt_regs;
+
+/* ES instruction emulation context */
+struct es_em_ctxt {
+ struct pt_regs *regs;
+ struct insn insn;
+ struct es_fault_info fi;
+};
+
+void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
+
+static inline u64 lower_bits(u64 val, unsigned int bits)
+{
+ u64 mask = (1ULL << bits) - 1;
+
+ return (val & mask);
+}
+
+struct real_mode_header;
+enum stack_type;
+
+/* Early IDT entry points for #VC handler */
+extern void vc_no_ghcb(void);
+extern void vc_boot_ghcb(void);
+extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern struct static_key_false sev_es_enable_key;
+extern void __sev_es_ist_enter(struct pt_regs *regs);
+extern void __sev_es_ist_exit(void);
+static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
+{
+ if (static_branch_unlikely(&sev_es_enable_key))
+ __sev_es_ist_enter(regs);
+}
+static __always_inline void sev_es_ist_exit(void)
+{
+ if (static_branch_unlikely(&sev_es_enable_key))
+ __sev_es_ist_exit();
+}
+extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
+extern void __sev_es_nmi_complete(void);
+static __always_inline void sev_es_nmi_complete(void)
+{
+ if (static_branch_unlikely(&sev_es_enable_key))
+ __sev_es_nmi_complete();
+}
+extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+#else
+static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+static inline void sev_es_ist_exit(void) { }
+static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
+static inline void sev_es_nmi_complete(void) { }
+static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 59a3e13204c3..cc177b4431ae 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -11,45 +11,47 @@
#include <linux/jump_label.h>
/*
- * Volatile isn't enough to prevent the compiler from reordering the
- * read/write functions for the control registers and messing everything up.
- * A memory clobber would solve the problem, but would prevent reordering of
- * all loads stores around it, which can hurt performance. Solution is to
- * use a variable and mimic reads and writes to it to enforce serialization
+ * The compiler should not reorder volatile asm statements with respect to each
+ * other: they should execute in program order. However GCC 4.9.x and 5.x have
+ * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder
+ * volatile asm. The write functions are not affected since they have memory
+ * clobbers preventing reordering. To prevent reads from being reordered with
+ * respect to writes, use a dummy memory operand.
*/
-extern unsigned long __force_order;
+
+#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL)
void native_write_cr0(unsigned long val);
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
- asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static __always_inline unsigned long native_read_cr2(void)
{
unsigned long val;
- asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static __always_inline void native_write_cr2(unsigned long val)
{
- asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
+ asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}
static inline unsigned long __native_read_cr3(void)
{
unsigned long val;
- asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
static inline void native_write_cr3(unsigned long val)
{
- asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
+ asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}
static inline unsigned long native_read_cr4(void)
@@ -64,10 +66,10 @@ static inline unsigned long native_read_cr4(void)
asm volatile("1: mov %%cr4, %0\n"
"2:\n"
_ASM_EXTABLE(1b, 2b)
- : "=r" (val), "=m" (__force_order) : "0" (0));
+ : "=r" (val) : "0" (0), __FORCE_ORDER);
#else
/* CR4 always exists on x86_64. */
- asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER);
#endif
return val;
}
@@ -234,6 +236,76 @@ static inline void clwb(volatile void *__p)
#define nop() asm volatile ("nop")
+static inline void serialize(void)
+{
+ /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
+ asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
+}
+
+/* The dst parameter must be 64-bytes aligned */
+static inline void movdir64b(void *dst, const void *src)
+{
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } *__dst = dst;
+
+ /*
+ * MOVDIR64B %(rdx), rax.
+ *
+ * Both __src and __dst must be memory constraints in order to tell the
+ * compiler that no other memory accesses should be reordered around
+ * this one.
+ *
+ * Also, both must be supplied as lvalues because this tells
+ * the compiler what the object is (its size) the instruction accesses.
+ * I.e., not the pointers but what they point to, thus the deref'ing '*'.
+ */
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ : "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
+}
+
+/**
+ * enqcmds - Enqueue a command in supervisor (CPL0) mode
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: 512 bits memory operand
+ *
+ * The ENQCMDS instruction allows software to write a 512-bit command to
+ * a 512-bit-aligned special MMIO region that supports the instruction.
+ * A return status is loaded into the ZF flag in the RFLAGS register.
+ * ZF = 0 equates to success, and ZF = 1 indicates retry or error.
+ *
+ * This function issues the ENQCMDS instruction to submit data from
+ * kernel space to MMIO space, in a unit of 512 bits. Order of data access
+ * is not guaranteed, nor is a memory barrier performed afterwards. It
+ * returns 0 on success and -EAGAIN on failure.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the
+ * ENQCMDS instruction is supported on the platform and the device accepts
+ * ENQCMDS.
+ */
+static inline int enqcmds(void __iomem *dst, const void *src)
+{
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } *__dst = dst;
+ int zf;
+
+ /*
+ * ENQCMDS %(rdx), rax
+ *
+ * See movdir64b()'s comment on operand specification.
+ */
+ asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
+ CC_SET(z)
+ : CC_OUT(z) (zf), "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
+
+ /* Submission failure is indicated via EFLAGS.ZF=1 */
+ if (zf)
+ return -EAGAIN;
+
+ return 0;
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 5ae5a68e469d..49600643faba 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -35,6 +35,8 @@ bool in_entry_stack(unsigned long *stack, struct stack_info *info);
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask);
+bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info);
const char *stack_type_name(enum stack_type type);
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
new file mode 100644
index 000000000000..c37f11999d0c
--- /dev/null
+++ b/arch/x86/include/asm/static_call.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_STATIC_CALL_H
+#define _ASM_STATIC_CALL_H
+
+#include <asm/text-patching.h>
+
+/*
+ * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
+ * uses the current value of the key->func pointer to do an indirect jump to
+ * the function. This trampoline is only used during boot, before the call
+ * sites get patched by static_call_update(). The name of this trampoline has
+ * a magical aspect: objtool uses it to find static call sites so it can create
+ * the .static_call_sites section.
+ *
+ * For CONFIG_HAVE_STATIC_CALL, this is a permanent trampoline which
+ * does a direct jump to the function. The direct jump gets patched by
+ * static_call_update().
+ *
+ * Having the trampoline in a special section forces GCC to emit a JMP.d32 when
+ * it does tail-call optimization on the call; since you cannot compute the
+ * relative displacement across sections.
+ */
+
+#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
+ asm(".pushsection .static_call.text, \"ax\" \n" \
+ ".align 4 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+ STATIC_CALL_TRAMP_STR(name) ": \n" \
+ insns " \n" \
+ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
+ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".popsection \n")
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")
+
+#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 75314c3dbe47..6e450827f677 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
#endif
-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
- size_t cnt);
-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
-
-/**
- * memcpy_mcsafe - copy memory with indication if a machine check happened
- *
- * @dst: destination address
- * @src: source address
- * @cnt: number of bytes to copy
- *
- * Low level memory copy function that catches machine checks
- * We only call into the "safe" function on systems that can
- * actually do machine check recovery. Everyone else can just
- * use memcpy().
- *
- * Return 0 for success, or number of bytes not copied if there was an
- * exception.
- */
-static __always_inline __must_check unsigned long
-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
-{
-#ifdef CONFIG_X86_MCE
- if (static_branch_unlikely(&mcsafe_key))
- return __memcpy_mcsafe(dst, src, cnt);
- else
-#endif
- memcpy(dst, src, cnt);
- return 0;
-}
-
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 8a1f5382a4ea..71d630bb5e08 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -3,10 +3,54 @@
#define __SVM_H
#include <uapi/asm/svm.h>
-
+#include <uapi/asm/kvm.h>
+
+/*
+ * 32-bit intercept words in the VMCB Control Area, starting
+ * at Byte offset 000h.
+ */
+
+enum intercept_words {
+ INTERCEPT_CR = 0,
+ INTERCEPT_DR,
+ INTERCEPT_EXCEPTION,
+ INTERCEPT_WORD3,
+ INTERCEPT_WORD4,
+ INTERCEPT_WORD5,
+ MAX_INTERCEPT,
+};
enum {
- INTERCEPT_INTR,
+ /* Byte offset 000h (word 0) */
+ INTERCEPT_CR0_READ = 0,
+ INTERCEPT_CR3_READ = 3,
+ INTERCEPT_CR4_READ = 4,
+ INTERCEPT_CR8_READ = 8,
+ INTERCEPT_CR0_WRITE = 16,
+ INTERCEPT_CR3_WRITE = 16 + 3,
+ INTERCEPT_CR4_WRITE = 16 + 4,
+ INTERCEPT_CR8_WRITE = 16 + 8,
+ /* Byte offset 004h (word 1) */
+ INTERCEPT_DR0_READ = 32,
+ INTERCEPT_DR1_READ,
+ INTERCEPT_DR2_READ,
+ INTERCEPT_DR3_READ,
+ INTERCEPT_DR4_READ,
+ INTERCEPT_DR5_READ,
+ INTERCEPT_DR6_READ,
+ INTERCEPT_DR7_READ,
+ INTERCEPT_DR0_WRITE = 48,
+ INTERCEPT_DR1_WRITE,
+ INTERCEPT_DR2_WRITE,
+ INTERCEPT_DR3_WRITE,
+ INTERCEPT_DR4_WRITE,
+ INTERCEPT_DR5_WRITE,
+ INTERCEPT_DR6_WRITE,
+ INTERCEPT_DR7_WRITE,
+ /* Byte offset 008h (word 2) */
+ INTERCEPT_EXCEPTION_OFFSET = 64,
+ /* Byte offset 00Ch (word 3) */
+ INTERCEPT_INTR = 96,
INTERCEPT_NMI,
INTERCEPT_SMI,
INTERCEPT_INIT,
@@ -38,7 +82,8 @@ enum {
INTERCEPT_TASK_SWITCH,
INTERCEPT_FERR_FREEZE,
INTERCEPT_SHUTDOWN,
- INTERCEPT_VMRUN,
+ /* Byte offset 010h (word 4) */
+ INTERCEPT_VMRUN = 128,
INTERCEPT_VMMCALL,
INTERCEPT_VMLOAD,
INTERCEPT_VMSAVE,
@@ -53,15 +98,18 @@ enum {
INTERCEPT_MWAIT_COND,
INTERCEPT_XSETBV,
INTERCEPT_RDPRU,
+ /* Byte offset 014h (word 5) */
+ INTERCEPT_INVLPGB = 160,
+ INTERCEPT_INVLPGB_ILLEGAL,
+ INTERCEPT_INVPCID,
+ INTERCEPT_MCOMMIT,
+ INTERCEPT_TLBSYNC,
};
struct __attribute__ ((__packed__)) vmcb_control_area {
- u32 intercept_cr;
- u32 intercept_dr;
- u32 intercept_exceptions;
- u64 intercept;
- u8 reserved_1[40];
+ u32 intercepts[MAX_INTERCEPT];
+ u32 reserved_1[15 - MAX_INTERCEPT];
u16 pause_filter_thresh;
u16 pause_filter_count;
u64 iopm_base_pa;
@@ -150,14 +198,14 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
-struct __attribute__ ((__packed__)) vmcb_seg {
+struct vmcb_seg {
u16 selector;
u16 attrib;
u32 limit;
u64 base;
-};
+} __packed;
-struct __attribute__ ((__packed__)) vmcb_save_area {
+struct vmcb_save_area {
struct vmcb_seg es;
struct vmcb_seg cs;
struct vmcb_seg ss;
@@ -200,20 +248,67 @@ struct __attribute__ ((__packed__)) vmcb_save_area {
u64 br_to;
u64 last_excp_from;
u64 last_excp_to;
-};
+ /*
+ * The following part of the save area is valid only for
+ * SEV-ES guests when referenced through the GHCB.
+ */
+ u8 reserved_7[104];
+ u64 reserved_8; /* rax already available at 0x01f8 */
+ u64 rcx;
+ u64 rdx;
+ u64 rbx;
+ u64 reserved_9; /* rsp already available at 0x01d8 */
+ u64 rbp;
+ u64 rsi;
+ u64 rdi;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+ u64 r12;
+ u64 r13;
+ u64 r14;
+ u64 r15;
+ u8 reserved_10[16];
+ u64 sw_exit_code;
+ u64 sw_exit_info_1;
+ u64 sw_exit_info_2;
+ u64 sw_scratch;
+ u8 reserved_11[56];
+ u64 xcr0;
+ u8 valid_bitmap[16];
+ u64 x87_state_gpa;
+} __packed;
+
+struct ghcb {
+ struct vmcb_save_area save;
+ u8 reserved_save[2048 - sizeof(struct vmcb_save_area)];
+
+ u8 shared_buffer[2032];
+
+ u8 reserved_1[10];
+ u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */
+ u32 ghcb_usage;
+} __packed;
+
+
+#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE 256
+#define EXPECTED_GHCB_SIZE PAGE_SIZE
static inline void __unused_size_checks(void)
{
- BUILD_BUG_ON(sizeof(struct vmcb_save_area) != 0x298);
- BUILD_BUG_ON(sizeof(struct vmcb_control_area) != 256);
+ BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
+ BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
}
-struct __attribute__ ((__packed__)) vmcb {
+struct vmcb {
struct vmcb_control_area control;
u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
struct vmcb_save_area save;
-};
+} __packed;
#define SVM_CPUID_FUNC 0x8000000a
@@ -240,32 +335,6 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
#define SVM_SELECTOR_CODE_MASK (1 << 3)
-#define INTERCEPT_CR0_READ 0
-#define INTERCEPT_CR3_READ 3
-#define INTERCEPT_CR4_READ 4
-#define INTERCEPT_CR8_READ 8
-#define INTERCEPT_CR0_WRITE (16 + 0)
-#define INTERCEPT_CR3_WRITE (16 + 3)
-#define INTERCEPT_CR4_WRITE (16 + 4)
-#define INTERCEPT_CR8_WRITE (16 + 8)
-
-#define INTERCEPT_DR0_READ 0
-#define INTERCEPT_DR1_READ 1
-#define INTERCEPT_DR2_READ 2
-#define INTERCEPT_DR3_READ 3
-#define INTERCEPT_DR4_READ 4
-#define INTERCEPT_DR5_READ 5
-#define INTERCEPT_DR6_READ 6
-#define INTERCEPT_DR7_READ 7
-#define INTERCEPT_DR0_WRITE (16 + 0)
-#define INTERCEPT_DR1_WRITE (16 + 1)
-#define INTERCEPT_DR2_WRITE (16 + 2)
-#define INTERCEPT_DR3_WRITE (16 + 3)
-#define INTERCEPT_DR4_WRITE (16 + 4)
-#define INTERCEPT_DR5_WRITE (16 + 5)
-#define INTERCEPT_DR6_WRITE (16 + 6)
-#define INTERCEPT_DR7_WRITE (16 + 7)
-
#define SVM_EVTINJ_VEC_MASK 0xff
#define SVM_EVTINJ_TYPE_SHIFT 8
@@ -298,4 +367,47 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
+/* GHCB Accessor functions */
+
+#define GHCB_BITMAP_IDX(field) \
+ (offsetof(struct vmcb_save_area, field) / sizeof(u64))
+
+#define DEFINE_GHCB_ACCESSORS(field) \
+ static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \
+ { \
+ return test_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&ghcb->save.valid_bitmap); \
+ } \
+ \
+ static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \
+ { \
+ __set_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&ghcb->save.valid_bitmap); \
+ ghcb->save.field = value; \
+ }
+
+DEFINE_GHCB_ACCESSORS(cpl)
+DEFINE_GHCB_ACCESSORS(rip)
+DEFINE_GHCB_ACCESSORS(rsp)
+DEFINE_GHCB_ACCESSORS(rax)
+DEFINE_GHCB_ACCESSORS(rcx)
+DEFINE_GHCB_ACCESSORS(rdx)
+DEFINE_GHCB_ACCESSORS(rbx)
+DEFINE_GHCB_ACCESSORS(rbp)
+DEFINE_GHCB_ACCESSORS(rsi)
+DEFINE_GHCB_ACCESSORS(rdi)
+DEFINE_GHCB_ACCESSORS(r8)
+DEFINE_GHCB_ACCESSORS(r9)
+DEFINE_GHCB_ACCESSORS(r10)
+DEFINE_GHCB_ACCESSORS(r11)
+DEFINE_GHCB_ACCESSORS(r12)
+DEFINE_GHCB_ACCESSORS(r13)
+DEFINE_GHCB_ACCESSORS(r14)
+DEFINE_GHCB_ACCESSORS(r15)
+DEFINE_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_GHCB_ACCESSORS(sw_scratch)
+DEFINE_GHCB_ACCESSORS(xcr0)
+
#endif
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index fdb5b356e59b..0fd4a9dfb29c 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -5,6 +5,7 @@
#include <linux/preempt.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
#ifdef CONFIG_X86_32
static inline void iret_to_self(void)
@@ -46,22 +47,34 @@ static inline void iret_to_self(void)
*
* b) Text was modified on a different CPU, may subsequently be
* executed on this CPU, and you want to make sure the new version
- * gets executed. This generally means you're calling this in a IPI.
+ * gets executed. This generally means you're calling this in an IPI.
*
* If you're calling this for a different reason, you're probably doing
* it wrong.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
*/
static inline void sync_core(void)
{
/*
- * There are quite a few ways to do this. IRET-to-self is nice
- * because it works on every CPU, at any CPL (so it's compatible
- * with paravirtualization), and it never exits to a hypervisor.
- * The only down sides are that it's a bit slow (it seems to be
- * a bit more than 2x slower than the fastest options) and that
- * it unmasks NMIs. The "push %cs" is needed because, in
- * paravirtual environments, __KERNEL_CS may not be a valid CS
- * value when we do IRET directly.
+ * The SERIALIZE instruction is the most straightforward way to
+ * do this, but it is not universally available.
+ */
+ if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
+ serialize();
+ return;
+ }
+
+ /*
+ * For all other processors, there are quite a few ways to do this.
+ * IRET-to-self is nice because it works on every CPU, at any CPL
+ * (so it's compatible with paravirtualization), and it never exits
+ * to a hypervisor. The only downsides are that it's a bit slow
+ * (it seems to be a bit more than 2x slower than the fastest
+ * options) and that it unmasks NMIs. The "push %cs" is needed,
+ * because in paravirtual environments __KERNEL_CS may not be a
+ * valid CS value when we do IRET directly.
*
* In case NMI unmasking or performance ever becomes a problem,
* the next best option appears to be MOV-to-CR2 and an
@@ -71,9 +84,6 @@ static inline void sync_core(void)
* CPUID is the conventional way, but it's nasty: it doesn't
* exist on some 486-like CPUs, and it usually exits to a
* hypervisor.
- *
- * Like all of Linux's memory ordering operations, this is a
- * compiler barrier as well.
*/
iret_to_self();
}
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 6593b42cb379..b7421780e4e9 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -53,6 +53,9 @@ extern void text_poke_finish(void);
#define INT3_INSN_SIZE 1
#define INT3_INSN_OPCODE 0xCC
+#define RET_INSN_SIZE 1
+#define RET_INSN_OPCODE 0xC3
+
#define CALL_INSN_SIZE 5
#define CALL_INSN_OPCODE 0xE8
@@ -73,6 +76,7 @@ static __always_inline int text_opcode_size(u8 opcode)
switch(opcode) {
__CASE(INT3);
+ __CASE(RET);
__CASE(CALL);
__CASE(JMP32);
__CASE(JMP8);
@@ -141,11 +145,26 @@ void int3_emulate_push(struct pt_regs *regs, unsigned long val)
}
static __always_inline
+unsigned long int3_emulate_pop(struct pt_regs *regs)
+{
+ unsigned long val = *(unsigned long *)regs->sp;
+ regs->sp += sizeof(unsigned long);
+ return val;
+}
+
+static __always_inline
void int3_emulate_call(struct pt_regs *regs, unsigned long func)
{
int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
int3_emulate_jmp(regs, func);
}
+
+static __always_inline
+void int3_emulate_ret(struct pt_regs *regs)
+{
+ unsigned long ip = int3_emulate_pop(regs);
+ int3_emulate_jmp(regs, ip);
+}
#endif /* !CONFIG_UML_X86 */
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 267701ae3d86..44733a4bfc42 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -102,7 +102,6 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
#define TIF_X32 30 /* 32-bit native x86-64 binary */
-#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -131,7 +130,6 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32)
-#define _TIF_FSCHECK (1 << TIF_FSCHECK)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h
new file mode 100644
index 000000000000..305bc1214aef
--- /dev/null
+++ b/arch/x86/include/asm/trap_pf.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_TRAP_PF_H
+#define _ASM_X86_TRAP_PF_H
+
+/*
+ * Page fault error code bits:
+ *
+ * bit 0 == 0: no page found 1: protection fault
+ * bit 1 == 0: read access 1: write access
+ * bit 2 == 0: kernel-mode access 1: user-mode access
+ * bit 3 == 1: use of reserved bit detected
+ * bit 4 == 1: fault was an instruction fetch
+ * bit 5 == 1: protection keys block access
+ */
+enum x86_pf_error_code {
+ X86_PF_PROT = 1 << 0,
+ X86_PF_WRITE = 1 << 1,
+ X86_PF_USER = 1 << 2,
+ X86_PF_RSVD = 1 << 3,
+ X86_PF_INSTR = 1 << 4,
+ X86_PF_PK = 1 << 5,
+};
+
+#endif /* _ASM_X86_TRAP_PF_H */
diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h
index 082f45631fa9..f5d2325aa0b7 100644
--- a/arch/x86/include/asm/trapnr.h
+++ b/arch/x86/include/asm/trapnr.h
@@ -26,6 +26,7 @@
#define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */
#define X86_TRAP_VE 20 /* Virtualization Exception */
#define X86_TRAP_CP 21 /* Control Protection Exception */
+#define X86_TRAP_VC 29 /* VMM Communication Exception */
#define X86_TRAP_IRET 32 /* IRET Exception */
#endif
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 714b1a30e7b0..7f7200021bd1 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -8,12 +8,14 @@
#include <asm/debugreg.h>
#include <asm/idtentry.h>
#include <asm/siginfo.h> /* TRAP_TRACE, ... */
+#include <asm/trap_pf.h>
#ifdef CONFIG_X86_64
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
void __init trap_init(void);
+asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
#endif
#ifdef CONFIG_X86_F00F_BUG
@@ -35,28 +37,12 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
+bool fault_in_kernel_space(unsigned long address);
+
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
unsigned long fault_address);
#endif
-/*
- * Page fault error code bits:
- *
- * bit 0 == 0: no page found 1: protection fault
- * bit 1 == 0: read access 1: write access
- * bit 2 == 0: kernel-mode access 1: user-mode access
- * bit 3 == 1: use of reserved bit detected
- * bit 4 == 1: fault was an instruction fetch
- * bit 5 == 1: protection keys block access
- */
-enum x86_pf_error_code {
- X86_PF_PROT = 1 << 0,
- X86_PF_WRITE = 1 << 1,
- X86_PF_USER = 1 << 2,
- X86_PF_RSVD = 1 << 3,
- X86_PF_INSTR = 1 << 4,
- X86_PF_PK = 1 << 5,
-};
#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ecefaffd15d4..c9fa7be3df82 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -13,30 +13,6 @@
#include <asm/extable.h>
/*
- * The fs value determines whether argument validity checking should be
- * performed or not. If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- */
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-#define KERNEL_DS MAKE_MM_SEG(-1UL)
-#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
-
-#define get_fs() (current->thread.addr_limit)
-static inline void set_fs(mm_segment_t fs)
-{
- current->thread.addr_limit = fs;
- /* On user-mode return, check fs is correct */
- set_thread_flag(TIF_FSCHECK);
-}
-
-#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
-#define user_addr_max() (current->thread.addr_limit.seg)
-
-/*
* Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise.
*/
@@ -93,28 +69,17 @@ static inline bool pagefault_disabled(void);
#define access_ok(addr, size) \
({ \
WARN_ON_IN_IRQ(); \
- likely(!__range_not_ok(addr, size, user_addr_max())); \
+ likely(!__range_not_ok(addr, size, TASK_SIZE_MAX)); \
})
-/*
- * These are the main single-value transfer routines. They automatically
- * use the right size if we just have the right pointer type.
- *
- * This gets kind of ugly. We want to return _two_ values in "get_user()"
- * and yet we don't want to do any pointers, because that is too much
- * of a performance impact. Thus we have a few rather ugly macros here,
- * and hide all the ugliness from the user.
- *
- * The "__xxx" versions of the user access functions are versions that
- * do not verify the address space, that must have been done previously
- * with a separate "access_ok()" call (this is used when we do multiple
- * accesses to the same area of user memory).
- */
-
extern int __get_user_1(void);
extern int __get_user_2(void);
extern int __get_user_4(void);
extern int __get_user_8(void);
+extern int __get_user_nocheck_1(void);
+extern int __get_user_nocheck_2(void);
+extern int __get_user_nocheck_4(void);
+extern int __get_user_nocheck_8(void);
extern int __get_user_bad(void);
#define __uaccess_begin() stac()
@@ -138,25 +103,12 @@ extern int __get_user_bad(void);
#define __typefits(x,type,not) \
__builtin_choose_expr(sizeof(x)<=sizeof(type),(unsigned type)0,not)
-/**
- * get_user - Get a simple variable from user space.
- * @x: Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Return: zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
/*
+ * This is used for both get_user() and __get_user() to expand to
+ * the proper special function call that has odd calling conventions
+ * due to returning both a value and an error, and that depends on
+ * the size of the pointer passed in.
+ *
* Careful: we have to cast the result to the type of the pointer
* for sign reasons.
*
@@ -169,13 +121,12 @@ extern int __get_user_bad(void);
* Clang/LLVM cares about the size of the register, but still wants
* the base register for something that ends up being a pair.
*/
-#define get_user(x, ptr) \
+#define do_get_user_call(fn,x,ptr) \
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
- might_fault(); \
- asm volatile("call __get_user_%P4" \
+ asm volatile("call __" #fn "_%P4" \
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
@@ -183,10 +134,48 @@ extern int __get_user_bad(void);
__builtin_expect(__ret_gu, 0); \
})
-#define __put_user_x(size, x, ptr, __ret_pu) \
- asm volatile("call __put_user_" #size : "=a" (__ret_pu) \
- : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+/**
+ * get_user - Get a simple variable from user space.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); })
+/**
+ * __get_user - Get a simple variable from user space, with less checking.
+ * @x: Variable to store result.
+ * @ptr: Source address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * This macro copies a single simple variable from user space to kernel
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and the result of
+ * dereferencing @ptr must be assignable to @x without a cast.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ * On error, the variable @x is set to zero.
+ */
+#define __get_user(x,ptr) do_get_user_call(get_user_nocheck,x,ptr)
#ifdef CONFIG_X86_32
@@ -199,25 +188,49 @@ extern int __get_user_bad(void);
: : "A" (x), "r" (addr) \
: : label)
-#define __put_user_x8(x, ptr, __ret_pu) \
- asm volatile("call __put_user_8" : "=a" (__ret_pu) \
- : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
#else
#define __put_user_goto_u64(x, ptr, label) \
__put_user_goto(x, ptr, "q", "er", label)
-#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
#endif
extern void __put_user_bad(void);
/*
* Strange magic calling convention: pointer in %ecx,
- * value in %eax(:%edx), return value in %eax. clobbers %rbx
+ * value in %eax(:%edx), return value in %ecx. clobbers %rbx
*/
extern void __put_user_1(void);
extern void __put_user_2(void);
extern void __put_user_4(void);
extern void __put_user_8(void);
+extern void __put_user_nocheck_1(void);
+extern void __put_user_nocheck_2(void);
+extern void __put_user_nocheck_4(void);
+extern void __put_user_nocheck_8(void);
+
+/*
+ * ptr must be evaluated and assigned to the temporary __ptr_pu before
+ * the assignment of x to __val_pu, to avoid any function calls
+ * involved in the ptr expression (possibly implicitly generated due
+ * to KASAN) from clobbering %ax.
+ */
+#define do_put_user_call(fn,x,ptr) \
+({ \
+ int __ret_pu; \
+ void __user *__ptr_pu; \
+ register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \
+ __chk_user_ptr(ptr); \
+ __ptr_pu = (ptr); \
+ __val_pu = (x); \
+ asm volatile("call __" #fn "_%P[size]" \
+ : "=c" (__ret_pu), \
+ ASM_CALL_CONSTRAINT \
+ : "0" (__ptr_pu), \
+ "r" (__val_pu), \
+ [size] "i" (sizeof(*(ptr))) \
+ :"ebx"); \
+ __builtin_expect(__ret_pu, 0); \
+})
/**
* put_user - Write a simple value into user space.
@@ -236,32 +249,29 @@ extern void __put_user_8(void);
*
* Return: zero on success, or -EFAULT on error.
*/
-#define put_user(x, ptr) \
-({ \
- int __ret_pu; \
- __typeof__(*(ptr)) __pu_val; \
- __chk_user_ptr(ptr); \
- might_fault(); \
- __pu_val = x; \
- switch (sizeof(*(ptr))) { \
- case 1: \
- __put_user_x(1, __pu_val, ptr, __ret_pu); \
- break; \
- case 2: \
- __put_user_x(2, __pu_val, ptr, __ret_pu); \
- break; \
- case 4: \
- __put_user_x(4, __pu_val, ptr, __ret_pu); \
- break; \
- case 8: \
- __put_user_x8(__pu_val, ptr, __ret_pu); \
- break; \
- default: \
- __put_user_x(X, __pu_val, ptr, __ret_pu); \
- break; \
- } \
- __builtin_expect(__ret_pu, 0); \
-})
+#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); })
+
+/**
+ * __put_user - Write a simple value into user space, with less checking.
+ * @x: Value to copy to user space.
+ * @ptr: Destination address, in user space.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * This macro copies a single simple value from kernel space to user
+ * space. It supports simple types like char and int, but not larger
+ * data types like structures or arrays.
+ *
+ * @ptr must have pointer-to-simple-variable type, and @x must be assignable
+ * to the result of dereferencing @ptr.
+ *
+ * Caller must check the pointer with access_ok() before calling this
+ * function.
+ *
+ * Return: zero on success, or -EFAULT on error.
+ */
+#define __put_user(x, ptr) do_put_user_call(put_user_nocheck,x,ptr)
#define __put_user_size(x, ptr, size, label) \
do { \
@@ -284,6 +294,55 @@ do { \
} \
} while (0)
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+#ifdef CONFIG_X86_32
+#define __get_user_asm_u64(x, ptr, label) do { \
+ unsigned int __gu_low, __gu_high; \
+ const unsigned int __user *__gu_ptr; \
+ __gu_ptr = (const void __user *)(ptr); \
+ __get_user_asm(__gu_low, ptr, "l", "=r", label); \
+ __get_user_asm(__gu_high, ptr+1, "l", "=r", label); \
+ (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \
+} while (0)
+#else
+#define __get_user_asm_u64(x, ptr, label) \
+ __get_user_asm(x, ptr, "q", "=r", label)
+#endif
+
+#define __get_user_size(x, ptr, size, label) \
+do { \
+ __chk_user_ptr(ptr); \
+ switch (size) { \
+ unsigned char x_u8__; \
+ case 1: \
+ __get_user_asm(x_u8__, ptr, "b", "=q", label); \
+ (x) = x_u8__; \
+ break; \
+ case 2: \
+ __get_user_asm(x, ptr, "w", "=r", label); \
+ break; \
+ case 4: \
+ __get_user_asm(x, ptr, "l", "=r", label); \
+ break; \
+ case 8: \
+ __get_user_asm_u64(x, ptr, label); \
+ break; \
+ default: \
+ (x) = __get_user_bad(); \
+ } \
+} while (0)
+
+#define __get_user_asm(x, addr, itype, ltype, label) \
+ asm_volatile_goto("\n" \
+ "1: mov"itype" %[umem],%[output]\n" \
+ _ASM_EXTABLE_UA(1b, %l2) \
+ : [output] ltype(x) \
+ : [umem] "m" (__m(addr)) \
+ : : label)
+
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
#ifdef CONFIG_X86_32
#define __get_user_asm_u64(x, ptr, retval) \
({ \
@@ -343,7 +402,7 @@ do { \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: mov %[efault],%[errout]\n" \
- " xor"itype" %[output],%[output]\n" \
+ " xorl %k[output],%k[output]\n" \
" jmp 2b\n" \
".previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \
@@ -352,33 +411,7 @@ do { \
: [umem] "m" (__m(addr)), \
[efault] "i" (-EFAULT), "0" (err))
-#define __put_user_nocheck(x, ptr, size) \
-({ \
- __label__ __pu_label; \
- int __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __pu_val = (x); \
- __typeof__(ptr) __pu_ptr = (ptr); \
- __typeof__(size) __pu_size = (size); \
- __uaccess_begin(); \
- __put_user_size(__pu_val, __pu_ptr, __pu_size, __pu_label); \
- __pu_err = 0; \
-__pu_label: \
- __uaccess_end(); \
- __builtin_expect(__pu_err, 0); \
-})
-
-#define __get_user_nocheck(x, ptr, size) \
-({ \
- int __gu_err; \
- __inttype(*(ptr)) __gu_val; \
- __typeof__(ptr) __gu_ptr = (ptr); \
- __typeof__(size) __gu_size = (size); \
- __uaccess_begin_nospec(); \
- __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err); \
- __uaccess_end(); \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
- __builtin_expect(__gu_err, 0); \
-})
+#endif // CONFIG_CC_ASM_GOTO_OUTPUT
/* FIXME: this hack is definitely wrong -AK */
struct __large_struct { unsigned long buf[100]; };
@@ -396,55 +429,6 @@ struct __large_struct { unsigned long buf[100]; };
: : ltype(x), "m" (__m(addr)) \
: : label)
-/**
- * __get_user - Get a simple variable from user space, with less checking.
- * @x: Variable to store result.
- * @ptr: Source address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * This macro copies a single simple variable from user space to kernel
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and the result of
- * dereferencing @ptr must be assignable to @x without a cast.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Return: zero on success, or -EFAULT on error.
- * On error, the variable @x is set to zero.
- */
-
-#define __get_user(x, ptr) \
- __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
-
-/**
- * __put_user - Write a simple value into user space, with less checking.
- * @x: Value to copy to user space.
- * @ptr: Destination address, in user space.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * This macro copies a single simple value from kernel space to user
- * space. It supports simple types like char and int, but not larger
- * data types like structures or arrays.
- *
- * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
- *
- * Caller must check the pointer with access_ok() before calling this
- * function.
- *
- * Return: zero on success, or -EFAULT on error.
- */
-
-#define __put_user(x, ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
extern unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
extern __must_check long
@@ -455,6 +439,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned len);
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+unsigned long __must_check
+copy_mc_to_user(void *to, const void *from, unsigned len);
+#endif
+
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
@@ -494,6 +487,14 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt
#define unsafe_put_user(x, ptr, label) \
__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define unsafe_get_user(x, ptr, err_label) \
+do { \
+ __inttype(*(ptr)) __gu_val; \
+ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), err_label); \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
+} while (0)
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define unsafe_get_user(x, ptr, err_label) \
do { \
int __gu_err; \
@@ -502,6 +503,7 @@ do { \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
if (unlikely(__gu_err)) goto err_label; \
} while (0)
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
/*
* We want the unsafe accessors to always be inlined and use
@@ -528,6 +530,11 @@ do { \
#define HAVE_GET_KERNEL_NOFAULT
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+#define __get_kernel_nofault(dst, src, type, err_label) \
+ __get_user_size(*((type *)(dst)), (__force type __user *)(src), \
+ sizeof(type), err_label)
+#else // !CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
int __kr_err; \
@@ -537,6 +544,7 @@ do { \
if (unlikely(__kr_err)) \
goto err_label; \
} while (0)
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
#define __put_kernel_nofault(dst, src, type, err_label) \
__put_user_size(*((type *)(src)), (__force type __user *)(dst), \
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index bc10e3dc64fe..e7265a552f4f 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -47,22 +47,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
}
static __always_inline __must_check unsigned long
-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
-{
- unsigned long ret;
-
- __uaccess_begin();
- /*
- * Note, __memcpy_mcsafe() is explicitly used since it can
- * handle exceptions / faults. memcpy_mcsafe() may fall back to
- * memcpy() which lacks this handling.
- */
- ret = __memcpy_mcsafe(to, from, len);
- __uaccess_end();
- return ret;
-}
-
-static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
return copy_user_generic(dst, (__force void *)src, size);
@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
-
-unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len);
-
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 7d903fdb3f43..664d4610d700 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -1,51 +1,17 @@
#ifndef _ASM_X86_UNWIND_HINTS_H
#define _ASM_X86_UNWIND_HINTS_H
+#include <linux/objtool.h>
+
#include "orc_types.h"
#ifdef __ASSEMBLY__
-/*
- * In asm, there are two kinds of code: normal C-type callable functions and
- * the rest. The normal callable functions can be called by other code, and
- * don't do anything unusual with the stack. Such normal callable functions
- * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
- * category. In this case, no special debugging annotations are needed because
- * objtool can automatically generate the ORC data for the ORC unwinder to read
- * at runtime.
- *
- * Anything which doesn't fall into the above category, such as syscall and
- * interrupt handlers, tends to not be called directly by other functions, and
- * often does unusual non-C-function-type things with the stack pointer. Such
- * code needs to be annotated such that objtool can understand it. The
- * following CFI hint macros are for this type of code.
- *
- * These macros provide hints to objtool about the state of the stack at each
- * instruction. Objtool starts from the hints and follows the code flow,
- * making automatic CFI adjustments when it sees pushes and pops, filling out
- * the debuginfo as necessary. It will also warn if it sees any
- * inconsistencies.
- */
-.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0
-#ifdef CONFIG_STACK_VALIDATION
-.Lunwind_hint_ip_\@:
- .pushsection .discard.unwind_hints
- /* struct unwind_hint */
- .long .Lunwind_hint_ip_\@ - .
- .short \sp_offset
- .byte \sp_reg
- .byte \type
- .byte \end
- .balign 4
- .popsection
-#endif
-.endm
-
.macro UNWIND_HINT_EMPTY
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1
+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
.endm
-.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
.if \base == %rsp
.if \indirect
.set sp_reg, ORC_REG_SP_INDIRECT
@@ -66,24 +32,24 @@
.set sp_offset, \offset
- .if \iret
- .set type, ORC_TYPE_REGS_IRET
+ .if \partial
+ .set type, UNWIND_HINT_TYPE_REGS_PARTIAL
.elseif \extra == 0
- .set type, ORC_TYPE_REGS_IRET
+ .set type, UNWIND_HINT_TYPE_REGS_PARTIAL
.set sp_offset, \offset + (16*8)
.else
- .set type, ORC_TYPE_REGS
+ .set type, UNWIND_HINT_TYPE_REGS
.endif
UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
.endm
.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
- UNWIND_HINT_REGS base=\base offset=\offset iret=1
+ UNWIND_HINT_REGS base=\base offset=\offset partial=1
.endm
.macro UNWIND_HINT_FUNC sp_offset=8
- UNWIND_HINT sp_offset=\sp_offset
+ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL
.endm
/*
@@ -92,7 +58,7 @@
* initial_func_cfi.
*/
.macro UNWIND_HINT_RET_OFFSET sp_offset=8
- UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
+ UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
.endm
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 70050d0136c3..08b3d810dfba 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -5,8 +5,9 @@
/*
* UV BIOS layer definitions.
*
- * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Russ Anderson <rja@sgi.com>
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
*/
#include <linux/rtc.h>
@@ -71,6 +72,11 @@ struct uv_gam_range_entry {
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
};
+#define UV_AT_SIZE 8 /* 7 character arch type + NULL char */
+struct uv_arch_type_entry {
+ char archtype[UV_AT_SIZE];
+};
+
#define UV_SYSTAB_SIG "UVST"
#define UV_SYSTAB_VERSION_1 1 /* UV2/3 BIOS version */
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
@@ -79,10 +85,14 @@ struct uv_gam_range_entry {
#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3
+#define UV_SYSTAB_VERSION_UV5 0x500 /* UV5 GAM base version */
+#define UV_SYSTAB_VERSION_UV5_LATEST UV_SYSTAB_VERSION_UV5
+
#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */
#define UV_SYSTAB_TYPE_GAM_RNG_TBL 2 /* GAM entry table */
-#define UV_SYSTAB_TYPE_MAX 3
+#define UV_SYSTAB_TYPE_ARCH_TYPE 3 /* UV arch type */
+#define UV_SYSTAB_TYPE_MAX 4
/*
* The UV system table describes specific firmware
@@ -133,6 +143,7 @@ extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
extern int uv_bios_init(void);
+extern unsigned long get_uv_systab_phys(bool msg);
extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e48aea9ba47d..172d3e4a9e4b 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -35,10 +35,8 @@ extern int is_uv_hubbed(int uvtype);
extern void uv_cpu_init(void);
extern void uv_nmi_init(void);
extern void uv_system_init(void);
-extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info);
-#else /* X86_UV */
+#else /* !X86_UV */
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline bool is_early_uv_system(void) { return 0; }
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
deleted file mode 100644
index cd24804955d7..000000000000
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ /dev/null
@@ -1,755 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * SGI UV Broadcast Assist Unit definitions
- *
- * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved.
- */
-
-#ifndef _ASM_X86_UV_UV_BAU_H
-#define _ASM_X86_UV_UV_BAU_H
-
-#include <linux/bitmap.h>
-#include <asm/idtentry.h>
-
-#define BITSPERBYTE 8
-
-/*
- * Broadcast Assist Unit messaging structures
- *
- * Selective Broadcast activations are induced by software action
- * specifying a particular 8-descriptor "set" via a 6-bit index written
- * to an MMR.
- * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
- * each 6-bit index value. These descriptor sets are mapped in sequence
- * starting with set 0 located at the address specified in the
- * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
- * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
- *
- * We will use one set for sending BAU messages from each of the
- * cpu's on the uvhub.
- *
- * TLB shootdown will use the first of the 8 descriptors of each set.
- * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
- */
-
-#define MAX_CPUS_PER_UVHUB 128
-#define MAX_CPUS_PER_SOCKET 64
-#define ADP_SZ 64 /* hardware-provided max. */
-#define UV_CPUS_PER_AS 32 /* hardware-provided max. */
-#define ITEMS_PER_DESC 8
-/* the 'throttle' to prevent the hardware stay-busy bug */
-#define MAX_BAU_CONCURRENT 3
-#define UV_ACT_STATUS_MASK 0x3
-#define UV_ACT_STATUS_SIZE 2
-#define UV_DISTRIBUTION_SIZE 256
-#define UV_SW_ACK_NPENDING 8
-#define UV_NET_ENDPOINT_INTD 0x28
-#define UV_PAYLOADQ_GNODE_SHIFT 49
-#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
-#define UV_BAU_BASENAME "sgi_uv/bau_tunables"
-#define UV_BAU_TUNABLES_DIR "sgi_uv"
-#define UV_BAU_TUNABLES_FILE "bau_tunables"
-#define WHITESPACE " \t\n"
-#define cpubit_isset(cpu, bau_local_cpumask) \
- test_bit((cpu), (bau_local_cpumask).bits)
-
-/* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */
-/*
- * UV2: Bit 19 selects between
- * (0): 10 microsecond timebase and
- * (1): 80 microseconds
- * we're using 560us
- */
-#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
-/* assuming UV3 is the same */
-
-#define BAU_MISC_CONTROL_MULT_MASK 3
-
-#define UVH_AGING_PRESCALE_SEL 0x000000b000UL
-/* [30:28] URGENCY_7 an index into a table of times */
-#define BAU_URGENCY_7_SHIFT 28
-#define BAU_URGENCY_7_MASK 7
-
-#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL
-/* [45:40] BAU - BAU transaction timeout select - a multiplier */
-#define BAU_TRANS_SHIFT 40
-#define BAU_TRANS_MASK 0x3f
-
-/*
- * shorten some awkward names
- */
-#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT
-#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT
-#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD
-#define PREFETCH_HINT_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT
-#define SB_STATUS_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-#define write_gmmr uv_write_global_mmr64
-#define write_lmmr uv_write_local_mmr
-#define read_lmmr uv_read_local_mmr
-#define read_gmmr uv_read_global_mmr64
-
-/*
- * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
- */
-#define DS_IDLE 0
-#define DS_ACTIVE 1
-#define DS_DESTINATION_TIMEOUT 2
-#define DS_SOURCE_TIMEOUT 3
-/*
- * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2
- * values 1 and 3 will not occur
- * Decoded meaning ERROR BUSY AUX ERR
- * ------------------------------- ---- ----- -------
- * IDLE 0 0 0
- * BUSY (active) 0 1 0
- * SW Ack Timeout (destination) 1 0 0
- * SW Ack INTD rejected (strong NACK) 1 0 1
- * Source Side Time Out Detected 1 1 0
- * Destination Side PUT Failed 1 1 1
- */
-#define UV2H_DESC_IDLE 0
-#define UV2H_DESC_BUSY 2
-#define UV2H_DESC_DEST_TIMEOUT 4
-#define UV2H_DESC_DEST_STRONG_NACK 5
-#define UV2H_DESC_SOURCE_TIMEOUT 6
-#define UV2H_DESC_DEST_PUT_ERR 7
-
-/*
- * delay for 'plugged' timeout retries, in microseconds
- */
-#define PLUGGED_DELAY 10
-
-/*
- * threshholds at which to use IPI to free resources
- */
-/* after this # consecutive 'plugged' timeouts, use IPI to release resources */
-#define PLUGSB4RESET 100
-/* after this many consecutive timeouts, use IPI to release resources */
-#define TIMEOUTSB4RESET 1
-/* at this number uses of IPI to release resources, giveup the request */
-#define IPI_RESET_LIMIT 1
-/* after this # consecutive successes, bump up the throttle if it was lowered */
-#define COMPLETE_THRESHOLD 5
-/* after this # of giveups (fall back to kernel IPI's) disable the use of
- the BAU for a period of time */
-#define GIVEUP_LIMIT 100
-
-#define UV_LB_SUBNODEID 0x10
-
-#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK
-/* 4 bits of software ack period */
-#define UV2_ACK_MASK 0x7UL
-#define UV2_ACK_UNITS_SHFT 3
-#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-
-/*
- * number of entries in the destination side payload queue
- */
-#define DEST_Q_SIZE 20
-/*
- * number of destination side software ack resources
- */
-#define DEST_NUM_RESOURCES 8
-/*
- * completion statuses for sending a TLB flush message
- */
-#define FLUSH_RETRY_PLUGGED 1
-#define FLUSH_RETRY_TIMEOUT 2
-#define FLUSH_GIVEUP 3
-#define FLUSH_COMPLETE 4
-
-/*
- * tuning the action when the numalink network is extremely delayed
- */
-#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in
- microseconds */
-#define CONGESTED_REPS 10 /* long delays averaged over
- this many broadcasts */
-#define DISABLED_PERIOD 10 /* time for the bau to be
- disabled, in seconds */
-/* see msg_type: */
-#define MSG_NOOP 0
-#define MSG_REGULAR 1
-#define MSG_RETRY 2
-
-#define BAU_DESC_QUALIFIER 0x534749
-
-enum uv_bau_version {
- UV_BAU_V2 = 2,
- UV_BAU_V3,
- UV_BAU_V4,
-};
-
-/*
- * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
- * If the 'multilevel' flag in the header portion of the descriptor
- * has been set to 0, then endpoint multi-unicast mode is selected.
- * The distribution specification (32 bytes) is interpreted as a 256-bit
- * distribution vector. Adjacent bits correspond to consecutive even numbered
- * nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nasid' field of the header corresponds to the
- * destination nodeID associated with that specified bit.
- */
-struct pnmask {
- unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
-};
-
-/*
- * mask of cpu's on a uvhub
- * (during initialization we need to check that unsigned long has
- * enough bits for max. cpu's per uvhub)
- */
-struct bau_local_cpumask {
- unsigned long bits;
-};
-
-/*
- * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
- * only 12 bytes (96 bits) of the payload area are usable.
- * An additional 3 bytes (bits 27:4) of the header address are carried
- * to the next bytes of the destination payload queue.
- * And an additional 2 bytes of the header Suppl_A field are also
- * carried to the destination payload queue.
- * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
- * of the destination payload queue, which is written by the hardware
- * with the s/w ack resource bit vector.
- * [ effective message contents (16 bytes (128 bits) maximum), not counting
- * the s/w ack bit vector ]
- */
-
-/**
- * struct uv2_3_bau_msg_payload - defines payload for INTD transactions
- * @address: Signifies a page or all TLB's of the cpu
- * @sending_cpu: CPU from which the message originates
- * @acknowledge_count: CPUs on the destination Hub that received the interrupt
- */
-struct uv2_3_bau_msg_payload {
- u64 address;
- u16 sending_cpu;
- u16 acknowledge_count;
-};
-
-/**
- * struct uv4_bau_msg_payload - defines payload for INTD transactions
- * @address: Signifies a page or all TLB's of the cpu
- * @sending_cpu: CPU from which the message originates
- * @acknowledge_count: CPUs on the destination Hub that received the interrupt
- * @qualifier: Set by source to verify origin of INTD broadcast
- */
-struct uv4_bau_msg_payload {
- u64 address;
- u16 sending_cpu;
- u16 acknowledge_count;
- u32 reserved:8;
- u32 qualifier:24;
-};
-
-/*
- * UV2 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
- * see figure 9-2 of harp_sys.pdf
- * assuming UV3 is the same
- */
-struct uv2_3_bau_msg_header {
- unsigned int base_dest_nasid:15; /* nasid of the first bit */
- /* bits 14:0 */ /* in uvhub map */
- unsigned int dest_subnodeid:5; /* must be 0x10, for the LB */
- /* bits 19:15 */
- unsigned int rsvd_1:1; /* must be zero */
- /* bit 20 */
- /* Address bits 59:21 */
- /* bits 25:2 of address (44:21) are payload */
- /* these next 24 bits become bytes 12-14 of msg */
- /* bits 28:21 land in byte 12 */
- unsigned int replied_to:1; /* sent as 0 by the source to
- byte 12 */
- /* bit 21 */
- unsigned int msg_type:3; /* software type of the
- message */
- /* bits 24:22 */
- unsigned int canceled:1; /* message canceled, resource
- is to be freed*/
- /* bit 25 */
- unsigned int payload_1:3; /* not currently used */
- /* bits 28:26 */
-
- /* bits 36:29 land in byte 13 */
- unsigned int payload_2a:3; /* not currently used */
- unsigned int payload_2b:5; /* not currently used */
- /* bits 36:29 */
-
- /* bits 44:37 land in byte 14 */
- unsigned int payload_3:8; /* not currently used */
- /* bits 44:37 */
-
- unsigned int rsvd_2:7; /* reserved */
- /* bits 51:45 */
- unsigned int swack_flag:1; /* software acknowledge flag */
- /* bit 52 */
- unsigned int rsvd_3a:3; /* must be zero */
- unsigned int rsvd_3b:8; /* must be zero */
- unsigned int rsvd_3c:8; /* must be zero */
- unsigned int rsvd_3d:3; /* must be zero */
- /* bits 74:53 */
- unsigned int fairness:3; /* usually zero */
- /* bits 77:75 */
-
- unsigned int sequence:16; /* message sequence number */
- /* bits 93:78 Suppl_A */
- unsigned int chaining:1; /* next descriptor is part of
- this activation*/
- /* bit 94 */
- unsigned int multilevel:1; /* multi-level multicast
- format */
- /* bit 95 */
- unsigned int rsvd_4:24; /* ordered / source node /
- source subnode / aging
- must be zero */
- /* bits 119:96 */
- unsigned int command:8; /* message type */
- /* bits 127:120 */
-};
-
-/*
- * The activation descriptor:
- * The format of the message to send, plus all accompanying control
- * Should be 64 bytes
- */
-struct bau_desc {
- struct pnmask distribution;
- /*
- * message template, consisting of header and payload:
- */
- union bau_msg_header {
- struct uv2_3_bau_msg_header uv2_3_hdr;
- } header;
-
- union bau_payload_header {
- struct uv2_3_bau_msg_payload uv2_3;
- struct uv4_bau_msg_payload uv4;
- } payload;
-};
-/* UV2:
- * -payload-- ---------header------
- * bytes 0-11 bits 70-78 bits 21-44
- * A B (2) C (3)
- *
- * A/B/C are moved to:
- * A C B
- * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
- * ------------payload queue-----------
- */
-
-/*
- * The payload queue on the destination side is an array of these.
- * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
- * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
- * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
- * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
- * swack_vec and payload_2)
- * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
- * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
- * operation."
- */
-struct bau_pq_entry {
- unsigned long address; /* signifies a page or all TLB's
- of the cpu */
- /* 64 bits, bytes 0-7 */
- unsigned short sending_cpu; /* cpu that sent the message */
- /* 16 bits, bytes 8-9 */
- unsigned short acknowledge_count; /* filled in by destination */
- /* 16 bits, bytes 10-11 */
- /* these next 3 bytes come from bits 58-81 of the message header */
- unsigned short replied_to:1; /* sent as 0 by the source */
- unsigned short msg_type:3; /* software message type */
- unsigned short canceled:1; /* sent as 0 by the source */
- unsigned short unused1:3; /* not currently using */
- /* byte 12 */
- unsigned char unused2a; /* not currently using */
- /* byte 13 */
- unsigned char unused2; /* not currently using */
- /* byte 14 */
- unsigned char swack_vec; /* filled in by the hardware */
- /* byte 15 (bits 127:120) */
- unsigned short sequence; /* message sequence number */
- /* bytes 16-17 */
- unsigned char unused4[2]; /* not currently using bytes 18-19 */
- /* bytes 18-19 */
- int number_of_cpus; /* filled in at destination */
- /* 32 bits, bytes 20-23 (aligned) */
- unsigned char unused5[8]; /* not using */
- /* bytes 24-31 */
-};
-
-struct msg_desc {
- struct bau_pq_entry *msg;
- int msg_slot;
- struct bau_pq_entry *queue_first;
- struct bau_pq_entry *queue_last;
-};
-
-struct reset_args {
- int sender;
-};
-
-/*
- * This structure is allocated per_cpu for UV TLB shootdown statistics.
- */
-struct ptc_stats {
- /* sender statistics */
- unsigned long s_giveup; /* number of fall backs to
- IPI-style flushes */
- unsigned long s_requestor; /* number of shootdown
- requests */
- unsigned long s_stimeout; /* source side timeouts */
- unsigned long s_dtimeout; /* destination side timeouts */
- unsigned long s_strongnacks; /* number of strong nack's */
- unsigned long s_time; /* time spent in sending side */
- unsigned long s_retriesok; /* successful retries */
- unsigned long s_ntargcpu; /* total number of cpu's
- targeted */
- unsigned long s_ntargself; /* times the sending cpu was
- targeted */
- unsigned long s_ntarglocals; /* targets of cpus on the local
- blade */
- unsigned long s_ntargremotes; /* targets of cpus on remote
- blades */
- unsigned long s_ntarglocaluvhub; /* targets of the local hub */
- unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
- unsigned long s_ntarguvhub; /* total number of uvhubs
- targeted */
- unsigned long s_ntarguvhub16; /* number of times target
- hubs >= 16*/
- unsigned long s_ntarguvhub8; /* number of times target
- hubs >= 8 */
- unsigned long s_ntarguvhub4; /* number of times target
- hubs >= 4 */
- unsigned long s_ntarguvhub2; /* number of times target
- hubs >= 2 */
- unsigned long s_ntarguvhub1; /* number of times target
- hubs == 1 */
- unsigned long s_resets_plug; /* ipi-style resets from plug
- state */
- unsigned long s_resets_timeout; /* ipi-style resets from
- timeouts */
- unsigned long s_busy; /* status stayed busy past
- s/w timer */
- unsigned long s_throttles; /* waits in throttle */
- unsigned long s_retry_messages; /* retry broadcasts */
- unsigned long s_bau_reenabled; /* for bau enable/disable */
- unsigned long s_bau_disabled; /* for bau enable/disable */
- unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
- unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
- unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
- unsigned long s_overipilimit; /* over the ipi reset limit */
- unsigned long s_giveuplimit; /* disables, over giveup limit*/
- unsigned long s_enters; /* entries to the driver */
- unsigned long s_ipifordisabled; /* fall back to IPI; disabled */
- unsigned long s_plugged; /* plugged by h/w bug*/
- unsigned long s_congested; /* giveup on long wait */
- /* destination statistics */
- unsigned long d_alltlb; /* times all tlb's on this
- cpu were flushed */
- unsigned long d_onetlb; /* times just one tlb on this
- cpu was flushed */
- unsigned long d_multmsg; /* interrupts with multiple
- messages */
- unsigned long d_nomsg; /* interrupts with no message */
- unsigned long d_time; /* time spent on destination
- side */
- unsigned long d_requestee; /* number of messages
- processed */
- unsigned long d_retries; /* number of retry messages
- processed */
- unsigned long d_canceled; /* number of messages canceled
- by retries */
- unsigned long d_nocanceled; /* retries that found nothing
- to cancel */
- unsigned long d_resets; /* number of ipi-style requests
- processed */
- unsigned long d_rcanceled; /* number of messages canceled
- by resets */
-};
-
-struct tunables {
- int *tunp;
- int deflt;
-};
-
-struct hub_and_pnode {
- short uvhub;
- short pnode;
-};
-
-struct socket_desc {
- short num_cpus;
- short cpu_number[MAX_CPUS_PER_SOCKET];
-};
-
-struct uvhub_desc {
- unsigned short socket_mask;
- short num_cpus;
- short uvhub;
- short pnode;
- struct socket_desc socket[2];
-};
-
-/**
- * struct bau_control
- * @status_mmr: location of status mmr, determined by uvhub_cpu
- * @status_index: index of ERR|BUSY bits in status mmr, determined by uvhub_cpu
- *
- * Per-cpu control struct containing CPU topology information and BAU tuneables.
- */
-struct bau_control {
- struct bau_desc *descriptor_base;
- struct bau_pq_entry *queue_first;
- struct bau_pq_entry *queue_last;
- struct bau_pq_entry *bau_msg_head;
- struct bau_control *uvhub_master;
- struct bau_control *socket_master;
- struct ptc_stats *statp;
- cpumask_t *cpumask;
- unsigned long timeout_interval;
- unsigned long set_bau_on_time;
- atomic_t active_descriptor_count;
- int plugged_tries;
- int timeout_tries;
- int ipi_attempts;
- int conseccompletes;
- u64 status_mmr;
- int status_index;
- bool nobau;
- short baudisabled;
- short cpu;
- short osnode;
- short uvhub_cpu;
- short uvhub;
- short uvhub_version;
- short cpus_in_socket;
- short cpus_in_uvhub;
- short partition_base_pnode;
- short busy; /* all were busy (war) */
- unsigned short message_number;
- unsigned short uvhub_quiesce;
- short socket_acknowledge_count[DEST_Q_SIZE];
- cycles_t send_message;
- cycles_t period_end;
- cycles_t period_time;
- spinlock_t uvhub_lock;
- spinlock_t queue_lock;
- spinlock_t disable_lock;
- /* tunables */
- int max_concurr;
- int max_concurr_const;
- int plugged_delay;
- int plugsb4reset;
- int timeoutsb4reset;
- int ipi_reset_limit;
- int complete_threshold;
- int cong_response_us;
- int cong_reps;
- cycles_t disabled_period;
- int period_giveups;
- int giveup_limit;
- long period_requests;
- struct hub_and_pnode *thp;
-};
-
-/* Abstracted BAU functions */
-struct bau_operations {
- unsigned long (*read_l_sw_ack)(void);
- unsigned long (*read_g_sw_ack)(int pnode);
- unsigned long (*bau_gpa_to_offset)(unsigned long vaddr);
- void (*write_l_sw_ack)(unsigned long mmr);
- void (*write_g_sw_ack)(int pnode, unsigned long mmr);
- void (*write_payload_first)(int pnode, unsigned long mmr);
- void (*write_payload_last)(int pnode, unsigned long mmr);
- int (*wait_completion)(struct bau_desc*,
- struct bau_control*, long try);
-};
-
-static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
-}
-
-static inline void write_mmr_descriptor_base(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image);
-}
-
-static inline void write_mmr_activation(unsigned long index)
-{
- write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
-}
-
-static inline void write_gmmr_activation(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_payload_tail(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image);
-}
-
-static inline void write_mmr_payload_last(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_misc_control(int pnode, unsigned long mmr_image)
-{
- write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
-}
-
-static inline unsigned long read_mmr_misc_control(int pnode)
-{
- return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL);
-}
-
-static inline void write_mmr_sw_ack(unsigned long mr)
-{
- uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
-{
- write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline unsigned long read_mmr_sw_ack(void)
-{
- return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline unsigned long read_gmmr_sw_ack(int pnode)
-{
- return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline void write_mmr_proc_sw_ack(unsigned long mr)
-{
- uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr)
-{
- write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline unsigned long read_mmr_proc_sw_ack(void)
-{
- return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline unsigned long read_gmmr_proc_sw_ack(int pnode)
-{
- return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline void write_mmr_data_config(int pnode, unsigned long mr)
-{
- uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
-}
-
-static inline int bau_uvhub_isset(int uvhub, struct pnmask *dstp)
-{
- return constant_test_bit(uvhub, &dstp->bits[0]);
-}
-static inline void bau_uvhub_set(int pnode, struct pnmask *dstp)
-{
- __set_bit(pnode, &dstp->bits[0]);
-}
-static inline void bau_uvhubs_clear(struct pnmask *dstp,
- int nbits)
-{
- bitmap_zero(&dstp->bits[0], nbits);
-}
-static inline int bau_uvhub_weight(struct pnmask *dstp)
-{
- return bitmap_weight((unsigned long *)&dstp->bits[0],
- UV_DISTRIBUTION_SIZE);
-}
-
-static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
-{
- bitmap_zero(&dstp->bits, nbits);
-}
-
-struct atomic_short {
- short counter;
-};
-
-/*
- * atomic_read_short - read a short atomic variable
- * @v: pointer of type atomic_short
- *
- * Atomically reads the value of @v.
- */
-static inline int atomic_read_short(const struct atomic_short *v)
-{
- return v->counter;
-}
-
-/*
- * atom_asr - add and return a short int
- * @i: short value to add
- * @v: pointer of type atomic_short
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static inline int atom_asr(short i, struct atomic_short *v)
-{
- short __i = i;
- asm volatile(LOCK_PREFIX "xaddw %0, %1"
- : "+r" (i), "+m" (v->counter)
- : : "memory");
- return i + __i;
-}
-
-/*
- * conditionally add 1 to *v, unless *v is >= u
- * return 0 if we cannot add 1 to *v because it is >= u
- * return 1 if we can add 1 to *v because it is < u
- * the add is atomic
- *
- * This is close to atomic_add_unless(), but this allows the 'u' value
- * to be lowered below the current 'v'. atomic_add_unless can only stop
- * on equal.
- */
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
-{
- spin_lock(lock);
- if (atomic_read(v) >= u) {
- spin_unlock(lock);
- return 0;
- }
- atomic_inc(v);
- spin_unlock(lock);
- return 1;
-}
-
-void uv_bau_message_interrupt(struct pt_regs *regs);
-
-#endif /* _ASM_X86_UV_UV_BAU_H */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 100d66806503..5002f52be332 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,6 +5,7 @@
*
* SGI UV architectural definitions
*
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
*/
@@ -129,17 +130,6 @@
*/
#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
-/* System Controller Interface Reg info */
-struct uv_scir_s {
- struct timer_list timer;
- unsigned long offset;
- unsigned long last;
- unsigned long idle_on;
- unsigned long idle_off;
- unsigned char state;
- unsigned char enabled;
-};
-
/* GAM (globally addressed memory) range table */
struct uv_gam_range_s {
u32 limit; /* PA bits 56:26 (GAM_RANGE_SHFT) */
@@ -155,6 +145,8 @@ struct uv_gam_range_s {
* available in the L3 cache on the cpu socket for the node.
*/
struct uv_hub_info_s {
+ unsigned int hub_type;
+ unsigned char hub_revision;
unsigned long global_mmr_base;
unsigned long global_mmr_shift;
unsigned long gpa_mask;
@@ -167,9 +159,9 @@ struct uv_hub_info_s {
unsigned char m_val;
unsigned char n_val;
unsigned char gr_table_len;
- unsigned char hub_revision;
unsigned char apic_pnode_shift;
unsigned char gpa_shift;
+ unsigned char nasid_shift;
unsigned char m_shift;
unsigned char n_lshift;
unsigned int gnode_extra;
@@ -191,16 +183,13 @@ struct uv_hub_info_s {
struct uv_cpu_info_s {
void *p_uv_hub_info;
unsigned char blade_cpu_id;
- struct uv_scir_s scir;
+ void *reserved;
};
DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
#define uv_cpu_info this_cpu_ptr(&__uv_cpu_info)
#define uv_cpu_info_per(cpu) (&per_cpu(__uv_cpu_info, cpu))
-#define uv_scir_info (&uv_cpu_info->scir)
-#define uv_cpu_scir_info(cpu) (&uv_cpu_info_per(cpu)->scir)
-
/* Node specific hub common info struct */
extern void **__uv_hub_info_list;
static inline struct uv_hub_info_s *uv_hub_info_list(int node)
@@ -219,6 +208,17 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
}
+static inline int uv_hub_type(void)
+{
+ return uv_hub_info->hub_type;
+}
+
+static inline __init void uv_hub_type_set(int uvmask)
+{
+ uv_hub_info->hub_type = uvmask;
+}
+
+
/*
* HUB revision ranges for each UV HUB architecture.
* This is a software convention - NOT the hardware revision numbers in
@@ -228,39 +228,31 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
#define UV3_HUB_REVISION_BASE 5
#define UV4_HUB_REVISION_BASE 7
#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */
+#define UV5_HUB_REVISION_BASE 9
-static inline int is_uv2_hub(void)
-{
- return is_uv_hubbed(uv(2));
-}
-
-static inline int is_uv3_hub(void)
-{
- return is_uv_hubbed(uv(3));
-}
+static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; }
+static inline int is_uv1_hub(void) { return 0; }
+static inline int is_uv2_hub(void) { return is_uv(UV2); }
+static inline int is_uv3_hub(void) { return is_uv(UV3); }
+static inline int is_uv4a_hub(void) { return is_uv(UV4A); }
+static inline int is_uv4_hub(void) { return is_uv(UV4); }
+static inline int is_uv5_hub(void) { return is_uv(UV5); }
-/* First test "is UV4A", then "is UV4" */
-static inline int is_uv4a_hub(void)
-{
- if (is_uv_hubbed(uv(4)))
- return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE);
- return 0;
-}
+/*
+ * UV4A is a revision of UV4. So on UV4A, both is_uv4_hub() and
+ * is_uv4a_hub() return true, While on UV4, only is_uv4_hub()
+ * returns true. So to get true results, first test if is UV4A,
+ * then test if is UV4.
+ */
-static inline int is_uv4_hub(void)
-{
- return is_uv_hubbed(uv(4));
-}
+/* UVX class: UV2,3,4 */
+static inline int is_uvx_hub(void) { return is_uv(UVX); }
-static inline int is_uvx_hub(void)
-{
- return (is_uv_hubbed(-2) >= uv(2));
-}
+/* UVY class: UV5,..? */
+static inline int is_uvy_hub(void) { return is_uv(UVY); }
-static inline int is_uv_hub(void)
-{
- return is_uvx_hub();
-}
+/* Any UV Hubbed System */
+static inline int is_uv_hub(void) { return is_uv(UV_ANY); }
union uvh_apicid {
unsigned long v;
@@ -282,9 +274,11 @@ union uvh_apicid {
* g - GNODE (full 15-bit global nasid, right shifted 1)
* p - PNODE (local part of nsids, right shifted 1)
*/
-#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_NASID_TO_PNODE(n) \
+ (((n) >> uv_hub_info->nasid_shift) & uv_hub_info->pnode_mask)
#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra)
-#define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1)
+#define UV_PNODE_TO_NASID(p) \
+ (UV_PNODE_TO_GNODE(p) << uv_hub_info->nasid_shift)
#define UV2_LOCAL_MMR_BASE 0xfa000000UL
#define UV2_GLOBAL_MMR32_BASE 0xfc000000UL
@@ -297,29 +291,42 @@ union uvh_apicid {
#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
#define UV4_LOCAL_MMR_BASE 0xfa000000UL
-#define UV4_GLOBAL_MMR32_BASE 0xfc000000UL
+#define UV4_GLOBAL_MMR32_BASE 0
#define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
-#define UV4_GLOBAL_MMR32_SIZE (16UL * 1024 * 1024)
+#define UV4_GLOBAL_MMR32_SIZE 0
+
+#define UV5_LOCAL_MMR_BASE 0xfa000000UL
+#define UV5_GLOBAL_MMR32_BASE 0
+#define UV5_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
+#define UV5_GLOBAL_MMR32_SIZE 0
#define UV_LOCAL_MMR_BASE ( \
- is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
- is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \
- /*is_uv4_hub*/ UV4_LOCAL_MMR_BASE)
+ is_uv(UV2) ? UV2_LOCAL_MMR_BASE : \
+ is_uv(UV3) ? UV3_LOCAL_MMR_BASE : \
+ is_uv(UV4) ? UV4_LOCAL_MMR_BASE : \
+ is_uv(UV5) ? UV5_LOCAL_MMR_BASE : \
+ 0)
#define UV_GLOBAL_MMR32_BASE ( \
- is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \
- is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \
- /*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE)
+ is_uv(UV2) ? UV2_GLOBAL_MMR32_BASE : \
+ is_uv(UV3) ? UV3_GLOBAL_MMR32_BASE : \
+ is_uv(UV4) ? UV4_GLOBAL_MMR32_BASE : \
+ is_uv(UV5) ? UV5_GLOBAL_MMR32_BASE : \
+ 0)
#define UV_LOCAL_MMR_SIZE ( \
- is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
- is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \
- /*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE)
+ is_uv(UV2) ? UV2_LOCAL_MMR_SIZE : \
+ is_uv(UV3) ? UV3_LOCAL_MMR_SIZE : \
+ is_uv(UV4) ? UV4_LOCAL_MMR_SIZE : \
+ is_uv(UV5) ? UV5_LOCAL_MMR_SIZE : \
+ 0)
#define UV_GLOBAL_MMR32_SIZE ( \
- is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \
- is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \
- /*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE)
+ is_uv(UV2) ? UV2_GLOBAL_MMR32_SIZE : \
+ is_uv(UV3) ? UV3_GLOBAL_MMR32_SIZE : \
+ is_uv(UV4) ? UV4_GLOBAL_MMR32_SIZE : \
+ is_uv(UV5) ? UV5_GLOBAL_MMR32_SIZE : \
+ 0)
#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base)
@@ -720,7 +727,7 @@ extern void uv_nmi_setup_hubless(void);
#define UVH_TSC_SYNC_SHIFT_UV2K 16 /* UV2/3k have different bits */
#define UVH_TSC_SYNC_MASK 3 /* 0011 */
#define UVH_TSC_SYNC_VALID 3 /* 0011 */
-#define UVH_TSC_SYNC_INVALID 2 /* 0010 */
+#define UVH_TSC_SYNC_UNKNOWN 0 /* 0000 */
/* BMC sets a bit this MMR non-zero before sending an NMI */
#define UVH_NMI_MMR UVH_BIOS_KERNEL_MMR
@@ -728,19 +735,6 @@ extern void uv_nmi_setup_hubless(void);
#define UVH_NMI_MMR_SHIFT 63
#define UVH_NMI_MMR_TYPE "SCRATCH5"
-/* Newer SMM NMI handler, not present in all systems */
-#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
-#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
-#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
-#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
-
-/* Non-zero indicates newer SMM NMI handler present */
-#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
-
-/* Indicates to BIOS that we want to use the newer SMM NMI handler */
-#define UVH_NMI_MMRX_REQ UVH_BIOS_KERNEL_MMR_ALIAS_2
-#define UVH_NMI_MMRX_REQ_SHIFT 62
-
struct uv_hub_nmi_s {
raw_spinlock_t nmi_lock;
atomic_t in_nmi; /* flag this node in UV NMI IRQ */
@@ -772,29 +766,6 @@ DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
#define UV_NMI_STATE_DUMP 2
#define UV_NMI_STATE_DUMP_DONE 3
-/* Update SCIR state */
-static inline void uv_set_scir_bits(unsigned char value)
-{
- if (uv_scir_info->state != value) {
- uv_scir_info->state = value;
- uv_write_local_mmr8(uv_scir_info->offset, value);
- }
-}
-
-static inline unsigned long uv_scir_offset(int apicid)
-{
- return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f);
-}
-
-static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
-{
- if (uv_cpu_scir_info(cpu)->state != value) {
- uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
- uv_cpu_scir_info(cpu)->offset, value);
- uv_cpu_scir_info(cpu)->state = value;
- }
-}
-
/*
* Get the minimum revision number of the hub chips within the partition.
* (See UVx_HUB_REVISION_BASE above for specific values.)
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 775bf143a072..57fa67373262 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3,8 +3,9 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * SGI UV MMR definitions
+ * HPE UV MMR definitions
*
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
*/
@@ -18,42 +19,43 @@
* grouped by architecture types.
*
* UVH - definitions common to all UV hub types.
- * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4).
- * UV2H - definitions specific to UV type 2 hub.
- * UV3H - definitions specific to UV type 3 hub.
+ * UVXH - definitions common to UVX class (2, 3, 4).
+ * UVYH - definitions common to UVY class (5).
+ * UV5H - definitions specific to UV type 5 hub.
+ * UV4AH - definitions specific to UV type 4A hub.
* UV4H - definitions specific to UV type 4 hub.
- *
- * So in general, MMR addresses and structures are identical on all hubs types.
- * These MMRs are identified as:
- * #define UVH_xxx <address>
- * union uvh_xxx {
- * unsigned long v;
- * struct uvh_int_cmpd_s {
- * } s;
- * };
+ * UV3H - definitions specific to UV type 3 hub.
+ * UV2H - definitions specific to UV type 2 hub.
*
* If the MMR exists on all hub types but have different addresses,
- * use a conditional operator to define the value at runtime.
- * #define UV2Hxxx b
- * #define UV3Hxxx c
- * #define UV4Hxxx d
- * #define UV4AHxxx e
- * #define UVHxxx (is_uv2_hub() ? UV2Hxxx :
- * (is_uv3_hub() ? UV3Hxxx :
- * (is_uv4a_hub() ? UV4AHxxx :
- * UV4Hxxx))
+ * use a conditional operator to define the value at runtime. Any
+ * that are not defined are blank.
+ * (UV4A variations only generated if different from uv4)
+ * #define UVHxxx (
+ * is_uv(UV5) ? UV5Hxxx value :
+ * is_uv(UV4A) ? UV4AHxxx value :
+ * is_uv(UV4) ? UV4Hxxx value :
+ * is_uv(UV3) ? UV3Hxxx value :
+ * is_uv(UV2) ? UV2Hxxx value :
+ * <ucv> or <undef value>)
+ *
+ * Class UVX has UVs (2|3|4|4A).
+ * Class UVY has UVs (5).
*
* union uvh_xxx {
* unsigned long v;
* struct uvh_xxx_s { # Common fields only
* } s;
- * struct uv2h_xxx_s { # Full UV2 definition (*)
- * } s2;
- * struct uv3h_xxx_s { # Full UV3 definition (*)
- * } s3;
- * (NOTE: No struct uv4ah_xxx_s members exist)
+ * struct uv5h_xxx_s { # Full UV5 definition (*)
+ * } s5;
+ * struct uv4ah_xxx_s { # Full UV4A definition (*)
+ * } s4a;
* struct uv4h_xxx_s { # Full UV4 definition (*)
* } s4;
+ * struct uv3h_xxx_s { # Full UV3 definition (*)
+ * } s3;
+ * struct uv2h_xxx_s { # Full UV2 definition (*)
+ * } s2;
* };
* (* - if present and different than the common struct)
*
@@ -62,429 +64,499 @@
* if the contents is the same for all hubs, only the "s" structure is
* generated.
*
- * If the MMR exists on ONLY 1 type of hub, no generic definition is
- * generated:
- * #define UVnH_xxx <uvn address>
- * union uvnh_xxx {
- * unsigned long v;
- * struct uvh_int_cmpd_s {
- * } sn;
- * };
- *
- * (GEN Flags: mflags_opt= undefs=function UV234=UVXH)
+ * (GEN Flags: undefs=function)
*/
+ /* UV bit masks */
+#define UV2 (1 << 0)
+#define UV3 (1 << 1)
+#define UV4 (1 << 2)
+#define UV4A (1 << 3)
+#define UV5 (1 << 4)
+#define UVX (UV2|UV3|UV4)
+#define UVY (UV5)
+#define UV_ANY (~0)
+
+
+
+
#define UV_MMR_ENABLE (1UL << 63)
+#define UV1_HUB_PART_NUMBER 0x88a5
#define UV2_HUB_PART_NUMBER 0x8eb8
#define UV2_HUB_PART_NUMBER_X 0x1111
#define UV3_HUB_PART_NUMBER 0x9578
#define UV3_HUB_PART_NUMBER_X 0x4321
#define UV4_HUB_PART_NUMBER 0x99a1
+#define UV5_HUB_PART_NUMBER 0xa171
/* Error function to catch undefined references */
extern unsigned long uv_undefined(char *str);
/* ========================================================================= */
-/* UVH_BAU_DATA_BROADCAST */
-/* ========================================================================= */
-#define UVH_BAU_DATA_BROADCAST 0x61688UL
-
-#define UV2H_BAU_DATA_BROADCAST_32 0x440
-#define UV3H_BAU_DATA_BROADCAST_32 0x440
-#define UV4H_BAU_DATA_BROADCAST_32 0x360
-#define UVH_BAU_DATA_BROADCAST_32 ( \
- is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 : \
- is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 : \
- /*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32)
-
-#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0
-#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL
-
-
-union uvh_bau_data_broadcast_u {
- unsigned long v;
- struct uvh_bau_data_broadcast_s {
- unsigned long enable:1; /* RW */
- unsigned long rsvd_1_63:63;
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_BAU_DATA_CONFIG */
-/* ========================================================================= */
-#define UVH_BAU_DATA_CONFIG 0x61680UL
-
-#define UV2H_BAU_DATA_CONFIG_32 0x438
-#define UV3H_BAU_DATA_CONFIG_32 0x438
-#define UV4H_BAU_DATA_CONFIG_32 0x358
-#define UVH_BAU_DATA_CONFIG_32 ( \
- is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 : \
- is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 : \
- /*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32)
-
-#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
-#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
-#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11
-#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12
-#define UVH_BAU_DATA_CONFIG_P_SHFT 13
-#define UVH_BAU_DATA_CONFIG_T_SHFT 15
-#define UVH_BAU_DATA_CONFIG_M_SHFT 16
-#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32
-#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
-
-
-union uvh_bau_data_config_u {
- unsigned long v;
- struct uvh_bau_data_config_s {
- unsigned long vector_:8; /* RW */
- unsigned long dm:3; /* RW */
- unsigned long destmode:1; /* RW */
- unsigned long status:1; /* RO */
- unsigned long p:1; /* RO */
- unsigned long rsvd_14:1;
- unsigned long t:1; /* RO */
- unsigned long m:1; /* RW */
- unsigned long rsvd_17_31:15;
- unsigned long apic_id:32; /* RW */
- } s;
-};
-
-/* ========================================================================= */
/* UVH_EVENT_OCCURRED0 */
/* ========================================================================= */
#define UVH_EVENT_OCCURRED0 0x70000UL
-#define UVH_EVENT_OCCURRED0_32 0x5e8
+/* UVH common defines*/
#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+/* UVXH common defines */
#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2
-#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
-#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
-#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
-#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
-#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
-#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
-#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
-#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
+#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
+#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
+#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5
#define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL
+#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6
#define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL
+#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
+#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
+#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
+#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
-#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
-#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
-
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
-#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
-#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
-
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_SHFT 1
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_SHFT 2
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_MASK 0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_SHFT 3
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_MASK 0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_SHFT 4
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_SHFT 5
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_SHFT 6
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_MASK 0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_SHFT 7
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_MASK 0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_SHFT 8
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_MASK 0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_SHFT 9
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_MASK 0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_SHFT 10
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_SHFT 11
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_SHFT 12
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_SHFT 13
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_SHFT 14
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_MASK 0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_SHFT 15
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_MASK 0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 16
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 17
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_SHFT 18
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_MASK 0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_SHFT 19
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_MASK 0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_SHFT 20
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_SHFT 21
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_MASK 0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_SHFT 22
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_MASK 0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_SHFT 23
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_MASK 0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 24
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 25
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_SHFT 26
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_SHFT 27
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_SHFT 28
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_MASK 0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_SHFT 29
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_MASK 0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 30
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 31
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_SHFT 32
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_MASK 0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_SHFT 33
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_MASK 0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_SHFT 34
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_SHFT 35
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_MASK 0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_SHFT 36
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_MASK 0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_SHFT 37
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_MASK 0x0000002000000000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 38
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000004000000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 39
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000008000000000UL
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 40
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000010000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 41
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000020000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 42
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000040000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 43
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000080000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 44
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000100000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 45
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000200000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 46
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000400000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 47
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000800000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 48
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0001000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 49
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0002000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 50
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0004000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 51
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0008000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 52
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0010000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 53
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0020000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 54
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0040000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 55
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0080000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 56
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0100000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 57
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0200000000000000UL
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 58
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0400000000000000UL
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 59
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0800000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 60
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x1000000000000000UL
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 61
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x2000000000000000UL
+
+/* UV4 unique defines */
#define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT 1
-#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22
-#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23
-#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24
-#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25
-#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26
-#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27
-#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28
-#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29
-#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36
-#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53
-#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54
-#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55
-#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56
-#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57
-#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58
-#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63
#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10
#define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17
#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18
#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19
#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20
#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21
#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22
#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23
#define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24
#define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25
#define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26
#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27
#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28
#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29
#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30
#define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31
#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32
#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33
#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34
#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35
#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36
#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37
#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0004000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0008000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0010000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53
#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0020000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54
#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0040000000000000UL
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55
#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0080000000000000UL
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56
#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0100000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57
#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0200000000000000UL
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58
#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0400000000000000UL
+#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59
#define UV4H_EVENT_OCCURRED0_IPI_INT_MASK 0x0800000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60
#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x1000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61
#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x2000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62
#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x4000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63
#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x8000000000000000UL
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK ( \
+ is_uv(UV4) ? 0x1000000000000000UL : \
+ is_uv(UV3) ? 0x0040000000000000UL : \
+ is_uv(UV2) ? 0x0040000000000000UL : \
+ 0)
#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT ( \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
+ is_uv(UV4) ? 60 : \
+ is_uv(UV3) ? 54 : \
+ is_uv(UV2) ? 54 : \
+ -1)
union uvh_event_occurred0_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_event_occurred0_s {
- unsigned long lb_hcerr:1; /* RW, W1C */
- unsigned long rsvd_1_10:10;
- unsigned long rh_aoerr0:1; /* RW, W1C */
- unsigned long rsvd_12_63:52;
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long rsvd_1_63:63;
} s;
+
+ /* UVXH common struct */
struct uvxh_event_occurred0_s {
unsigned long lb_hcerr:1; /* RW */
unsigned long rsvd_1:1;
@@ -505,6 +577,142 @@ union uvh_event_occurred0_u {
unsigned long xb_aoerr0:1; /* RW */
unsigned long rsvd_17_63:47;
} sx;
+
+ /* UVYH common struct */
+ struct uvyh_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long kt_hcerr:1; /* RW */
+ unsigned long rh0_hcerr:1; /* RW */
+ unsigned long rh1_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long lh2_hcerr:1; /* RW */
+ unsigned long lh3_hcerr:1; /* RW */
+ unsigned long xb_hcerr:1; /* RW */
+ unsigned long rdm_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long kt_aoerr0:1; /* RW */
+ unsigned long rh0_aoerr0:1; /* RW */
+ unsigned long rh1_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long lh2_aoerr0:1; /* RW */
+ unsigned long lh3_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rdm_aoerr0:1; /* RW */
+ unsigned long rt0_aoerr0:1; /* RW */
+ unsigned long rt1_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long kt_aoerr1:1; /* RW */
+ unsigned long rh0_aoerr1:1; /* RW */
+ unsigned long rh1_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long lh2_aoerr1:1; /* RW */
+ unsigned long lh3_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rdm_aoerr1:1; /* RW */
+ unsigned long rt0_aoerr1:1; /* RW */
+ unsigned long rt1_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long rsvd_62_63:2;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long kt_hcerr:1; /* RW */
+ unsigned long rh0_hcerr:1; /* RW */
+ unsigned long rh1_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long lh2_hcerr:1; /* RW */
+ unsigned long lh3_hcerr:1; /* RW */
+ unsigned long xb_hcerr:1; /* RW */
+ unsigned long rdm_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long kt_aoerr0:1; /* RW */
+ unsigned long rh0_aoerr0:1; /* RW */
+ unsigned long rh1_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long lh2_aoerr0:1; /* RW */
+ unsigned long lh3_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rdm_aoerr0:1; /* RW */
+ unsigned long rt0_aoerr0:1; /* RW */
+ unsigned long rt1_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long kt_aoerr1:1; /* RW */
+ unsigned long rh0_aoerr1:1; /* RW */
+ unsigned long rh1_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long lh2_aoerr1:1; /* RW */
+ unsigned long lh3_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rdm_aoerr1:1; /* RW */
+ unsigned long rt0_aoerr1:1; /* RW */
+ unsigned long rt1_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long rsvd_62_63:2;
+ } s5;
+
+ /* UV4 unique struct */
struct uv4h_event_occurred0_s {
unsigned long lb_hcerr:1; /* RW */
unsigned long kt_hcerr:1; /* RW */
@@ -571,13 +779,1355 @@ union uvh_event_occurred0_u {
unsigned long extio_int2:1; /* RW */
unsigned long extio_int3:1; /* RW */
} s4;
+
+ /* UV3 unique struct */
+ struct uv3h_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long qp_hcerr:1; /* RW */
+ unsigned long rh_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long gr0_hcerr:1; /* RW */
+ unsigned long gr1_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long qp_aoerr0:1; /* RW */
+ unsigned long rh_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long gr0_aoerr0:1; /* RW */
+ unsigned long gr1_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rt_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long qp_aoerr1:1; /* RW */
+ unsigned long rh_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long gr0_aoerr1:1; /* RW */
+ unsigned long gr1_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rt_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long rsvd_59_63:5;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long qp_hcerr:1; /* RW */
+ unsigned long rh_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long gr0_hcerr:1; /* RW */
+ unsigned long gr1_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long qp_aoerr0:1; /* RW */
+ unsigned long rh_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long gr0_aoerr0:1; /* RW */
+ unsigned long gr1_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rt_aoerr0:1; /* RW */
+ unsigned long ni0_aoerr0:1; /* RW */
+ unsigned long ni1_aoerr0:1; /* RW */
+ unsigned long lb_aoerr1:1; /* RW */
+ unsigned long qp_aoerr1:1; /* RW */
+ unsigned long rh_aoerr1:1; /* RW */
+ unsigned long lh0_aoerr1:1; /* RW */
+ unsigned long lh1_aoerr1:1; /* RW */
+ unsigned long gr0_aoerr1:1; /* RW */
+ unsigned long gr1_aoerr1:1; /* RW */
+ unsigned long xb_aoerr1:1; /* RW */
+ unsigned long rt_aoerr1:1; /* RW */
+ unsigned long ni0_aoerr1:1; /* RW */
+ unsigned long ni1_aoerr1:1; /* RW */
+ unsigned long system_shutdown_int:1; /* RW */
+ unsigned long lb_irq_int_0:1; /* RW */
+ unsigned long lb_irq_int_1:1; /* RW */
+ unsigned long lb_irq_int_2:1; /* RW */
+ unsigned long lb_irq_int_3:1; /* RW */
+ unsigned long lb_irq_int_4:1; /* RW */
+ unsigned long lb_irq_int_5:1; /* RW */
+ unsigned long lb_irq_int_6:1; /* RW */
+ unsigned long lb_irq_int_7:1; /* RW */
+ unsigned long lb_irq_int_8:1; /* RW */
+ unsigned long lb_irq_int_9:1; /* RW */
+ unsigned long lb_irq_int_10:1; /* RW */
+ unsigned long lb_irq_int_11:1; /* RW */
+ unsigned long lb_irq_int_12:1; /* RW */
+ unsigned long lb_irq_int_13:1; /* RW */
+ unsigned long lb_irq_int_14:1; /* RW */
+ unsigned long lb_irq_int_15:1; /* RW */
+ unsigned long l1_nmi_int:1; /* RW */
+ unsigned long stop_clock:1; /* RW */
+ unsigned long asic_to_l1:1; /* RW */
+ unsigned long l1_to_asic:1; /* RW */
+ unsigned long la_seq_trigger:1; /* RW */
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long rsvd_59_63:5;
+ } s2;
};
/* ========================================================================= */
/* UVH_EVENT_OCCURRED0_ALIAS */
/* ========================================================================= */
#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
-#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
+
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED1 */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED1 0x70080UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED1_IPI_INT_SHFT 0
+#define UVYH_EVENT_OCCURRED1_IPI_INT_MASK 0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_SHFT 1
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_MASK 0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_SHFT 2
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_MASK 0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_SHFT 3
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_MASK 0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_SHFT 4
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_MASK 0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_SHFT 5
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_SHFT 6
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_SHFT 7
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_SHFT 8
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_MASK 0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_SHFT 9
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_MASK 0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_SHFT 10
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_MASK 0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_SHFT 11
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_MASK 0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_SHFT 12
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_MASK 0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_SHFT 13
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_MASK 0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_SHFT 14
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_MASK 0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_SHFT 15
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_MASK 0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_SHFT 16
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_MASK 0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_SHFT 17
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_MASK 0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_SHFT 18
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_MASK 0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_SHFT 19
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_MASK 0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_SHFT 20
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_MASK 0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_SHFT 21
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_MASK 0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_SHFT 22
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_MASK 0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_SHFT 23
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_MASK 0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_SHFT 24
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_MASK 0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_SHFT 25
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_MASK 0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_SHFT 26
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_MASK 0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_SHFT 27
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_MASK 0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_SHFT 28
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_MASK 0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_SHFT 29
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_MASK 0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_SHFT 30
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_MASK 0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_SHFT 31
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_MASK 0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_SHFT 32
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_MASK 0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_SHFT 33
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_MASK 0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_SHFT 34
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_MASK 0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_SHFT 35
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_MASK 0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_SHFT 36
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_MASK 0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_SHFT 37
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_MASK 0x0000002000000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_SHFT 0
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_SHFT 1
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_SHFT 2
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 3
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 4
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 5
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 6
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 7
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 8
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 9
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 10
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 11
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 12
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 13
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 14
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 15
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 16
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 17
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 18
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_SHFT 19
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_SHFT 20
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_SHFT 21
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_SHFT 22
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_SHFT 23
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_SHFT 24
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_SHFT 25
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_SHFT 26
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 27
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 28
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 29
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 30
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 31
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 32
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 33
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 34
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 35
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 36
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 37
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 38
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 39
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 40
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 41
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 42
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_SHFT 43
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_SHFT 44
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_SHFT 45
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_SHFT 46
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_SHFT 47
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_SHFT 48
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_SHFT 49
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_MASK 0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_SHFT 50
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_MASK 0x0004000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_SHFT 0
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_SHFT 0
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_MASK 0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT 1
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT 18
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT 19
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT 20
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT 21
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT 22
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT 23
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT 24
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT 25
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT 26
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT 27
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT 28
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT 29
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT 30
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT 31
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK 0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT 32
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK 0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT 33
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK 0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT 34
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK 0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT 35
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK 0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT 36
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK 0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT 37
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK 0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT 38
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK 0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT 39
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK 0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT 40
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK 0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT 41
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK 0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT 42
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK 0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT 43
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK 0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT 44
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK 0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT 45
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK 0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT 46
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK 0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT 47
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK 0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT 48
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK 0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT 49
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK 0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT 50
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK 0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT 51
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK 0x0008000000000000UL
+
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK ( \
+ is_uv(UV5) ? 0x0000000000000002UL : \
+ 0)
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT ( \
+ is_uv(UV5) ? 1 : \
+ -1)
+
+union uvyh_event_occurred1_u {
+ unsigned long v;
+
+ /* UVYH common struct */
+ struct uvyh_event_occurred1_s {
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long bau_data:1; /* RW */
+ unsigned long proc_general:1; /* RW */
+ unsigned long xh_tlb_int0:1; /* RW */
+ unsigned long xh_tlb_int1:1; /* RW */
+ unsigned long xh_tlb_int2:1; /* RW */
+ unsigned long xh_tlb_int3:1; /* RW */
+ unsigned long xh_tlb_int4:1; /* RW */
+ unsigned long xh_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int0:1; /* RW */
+ unsigned long rdm_tlb_int1:1; /* RW */
+ unsigned long rdm_tlb_int2:1; /* RW */
+ unsigned long rdm_tlb_int3:1; /* RW */
+ unsigned long rdm_tlb_int4:1; /* RW */
+ unsigned long rdm_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int6:1; /* RW */
+ unsigned long rdm_tlb_int7:1; /* RW */
+ unsigned long rdm_tlb_int8:1; /* RW */
+ unsigned long rdm_tlb_int9:1; /* RW */
+ unsigned long rdm_tlb_int10:1; /* RW */
+ unsigned long rdm_tlb_int11:1; /* RW */
+ unsigned long rdm_tlb_int12:1; /* RW */
+ unsigned long rdm_tlb_int13:1; /* RW */
+ unsigned long rdm_tlb_int14:1; /* RW */
+ unsigned long rdm_tlb_int15:1; /* RW */
+ unsigned long rdm_tlb_int16:1; /* RW */
+ unsigned long rdm_tlb_int17:1; /* RW */
+ unsigned long rdm_tlb_int18:1; /* RW */
+ unsigned long rdm_tlb_int19:1; /* RW */
+ unsigned long rdm_tlb_int20:1; /* RW */
+ unsigned long rdm_tlb_int21:1; /* RW */
+ unsigned long rdm_tlb_int22:1; /* RW */
+ unsigned long rdm_tlb_int23:1; /* RW */
+ unsigned long rsvd_38_63:26;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_event_occurred1_s {
+ unsigned long ipi_int:1; /* RW */
+ unsigned long extio_int0:1; /* RW */
+ unsigned long extio_int1:1; /* RW */
+ unsigned long extio_int2:1; /* RW */
+ unsigned long extio_int3:1; /* RW */
+ unsigned long profile_int:1; /* RW */
+ unsigned long bau_data:1; /* RW */
+ unsigned long proc_general:1; /* RW */
+ unsigned long xh_tlb_int0:1; /* RW */
+ unsigned long xh_tlb_int1:1; /* RW */
+ unsigned long xh_tlb_int2:1; /* RW */
+ unsigned long xh_tlb_int3:1; /* RW */
+ unsigned long xh_tlb_int4:1; /* RW */
+ unsigned long xh_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int0:1; /* RW */
+ unsigned long rdm_tlb_int1:1; /* RW */
+ unsigned long rdm_tlb_int2:1; /* RW */
+ unsigned long rdm_tlb_int3:1; /* RW */
+ unsigned long rdm_tlb_int4:1; /* RW */
+ unsigned long rdm_tlb_int5:1; /* RW */
+ unsigned long rdm_tlb_int6:1; /* RW */
+ unsigned long rdm_tlb_int7:1; /* RW */
+ unsigned long rdm_tlb_int8:1; /* RW */
+ unsigned long rdm_tlb_int9:1; /* RW */
+ unsigned long rdm_tlb_int10:1; /* RW */
+ unsigned long rdm_tlb_int11:1; /* RW */
+ unsigned long rdm_tlb_int12:1; /* RW */
+ unsigned long rdm_tlb_int13:1; /* RW */
+ unsigned long rdm_tlb_int14:1; /* RW */
+ unsigned long rdm_tlb_int15:1; /* RW */
+ unsigned long rdm_tlb_int16:1; /* RW */
+ unsigned long rdm_tlb_int17:1; /* RW */
+ unsigned long rdm_tlb_int18:1; /* RW */
+ unsigned long rdm_tlb_int19:1; /* RW */
+ unsigned long rdm_tlb_int20:1; /* RW */
+ unsigned long rdm_tlb_int21:1; /* RW */
+ unsigned long rdm_tlb_int22:1; /* RW */
+ unsigned long rdm_tlb_int23:1; /* RW */
+ unsigned long rsvd_38_63:26;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_event_occurred1_s {
+ unsigned long profile_int:1; /* RW */
+ unsigned long bau_data:1; /* RW */
+ unsigned long proc_general:1; /* RW */
+ unsigned long gr0_tlb_int0:1; /* RW */
+ unsigned long gr0_tlb_int1:1; /* RW */
+ unsigned long gr0_tlb_int2:1; /* RW */
+ unsigned long gr0_tlb_int3:1; /* RW */
+ unsigned long gr0_tlb_int4:1; /* RW */
+ unsigned long gr0_tlb_int5:1; /* RW */
+ unsigned long gr0_tlb_int6:1; /* RW */
+ unsigned long gr0_tlb_int7:1; /* RW */
+ unsigned long gr0_tlb_int8:1; /* RW */
+ unsigned long gr0_tlb_int9:1; /* RW */
+ unsigned long gr0_tlb_int10:1; /* RW */
+ unsigned long gr0_tlb_int11:1; /* RW */
+ unsigned long gr0_tlb_int12:1; /* RW */
+ unsigned long gr0_tlb_int13:1; /* RW */
+ unsigned long gr0_tlb_int14:1; /* RW */
+ unsigned long gr0_tlb_int15:1; /* RW */
+ unsigned long gr0_tlb_int16:1; /* RW */
+ unsigned long gr0_tlb_int17:1; /* RW */
+ unsigned long gr0_tlb_int18:1; /* RW */
+ unsigned long gr0_tlb_int19:1; /* RW */
+ unsigned long gr0_tlb_int20:1; /* RW */
+ unsigned long gr0_tlb_int21:1; /* RW */
+ unsigned long gr0_tlb_int22:1; /* RW */
+ unsigned long gr0_tlb_int23:1; /* RW */
+ unsigned long gr1_tlb_int0:1; /* RW */
+ unsigned long gr1_tlb_int1:1; /* RW */
+ unsigned long gr1_tlb_int2:1; /* RW */
+ unsigned long gr1_tlb_int3:1; /* RW */
+ unsigned long gr1_tlb_int4:1; /* RW */
+ unsigned long gr1_tlb_int5:1; /* RW */
+ unsigned long gr1_tlb_int6:1; /* RW */
+ unsigned long gr1_tlb_int7:1; /* RW */
+ unsigned long gr1_tlb_int8:1; /* RW */
+ unsigned long gr1_tlb_int9:1; /* RW */
+ unsigned long gr1_tlb_int10:1; /* RW */
+ unsigned long gr1_tlb_int11:1; /* RW */
+ unsigned long gr1_tlb_int12:1; /* RW */
+ unsigned long gr1_tlb_int13:1; /* RW */
+ unsigned long gr1_tlb_int14:1; /* RW */
+ unsigned long gr1_tlb_int15:1; /* RW */
+ unsigned long gr1_tlb_int16:1; /* RW */
+ unsigned long gr1_tlb_int17:1; /* RW */
+ unsigned long gr1_tlb_int18:1; /* RW */
+ unsigned long gr1_tlb_int19:1; /* RW */
+ unsigned long gr1_tlb_int20:1; /* RW */
+ unsigned long gr1_tlb_int21:1; /* RW */
+ unsigned long gr1_tlb_int22:1; /* RW */
+ unsigned long gr1_tlb_int23:1; /* RW */
+ unsigned long rsvd_51_63:13;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_event_occurred1_s {
+ unsigned long bau_data:1; /* RW */
+ unsigned long power_management_req:1; /* RW */
+ unsigned long message_accelerator_int0:1; /* RW */
+ unsigned long message_accelerator_int1:1; /* RW */
+ unsigned long message_accelerator_int2:1; /* RW */
+ unsigned long message_accelerator_int3:1; /* RW */
+ unsigned long message_accelerator_int4:1; /* RW */
+ unsigned long message_accelerator_int5:1; /* RW */
+ unsigned long message_accelerator_int6:1; /* RW */
+ unsigned long message_accelerator_int7:1; /* RW */
+ unsigned long message_accelerator_int8:1; /* RW */
+ unsigned long message_accelerator_int9:1; /* RW */
+ unsigned long message_accelerator_int10:1; /* RW */
+ unsigned long message_accelerator_int11:1; /* RW */
+ unsigned long message_accelerator_int12:1; /* RW */
+ unsigned long message_accelerator_int13:1; /* RW */
+ unsigned long message_accelerator_int14:1; /* RW */
+ unsigned long message_accelerator_int15:1; /* RW */
+ unsigned long gr0_tlb_int0:1; /* RW */
+ unsigned long gr0_tlb_int1:1; /* RW */
+ unsigned long gr0_tlb_int2:1; /* RW */
+ unsigned long gr0_tlb_int3:1; /* RW */
+ unsigned long gr0_tlb_int4:1; /* RW */
+ unsigned long gr0_tlb_int5:1; /* RW */
+ unsigned long gr0_tlb_int6:1; /* RW */
+ unsigned long gr0_tlb_int7:1; /* RW */
+ unsigned long gr0_tlb_int8:1; /* RW */
+ unsigned long gr0_tlb_int9:1; /* RW */
+ unsigned long gr0_tlb_int10:1; /* RW */
+ unsigned long gr0_tlb_int11:1; /* RW */
+ unsigned long gr0_tlb_int12:1; /* RW */
+ unsigned long gr0_tlb_int13:1; /* RW */
+ unsigned long gr0_tlb_int14:1; /* RW */
+ unsigned long gr0_tlb_int15:1; /* RW */
+ unsigned long gr1_tlb_int0:1; /* RW */
+ unsigned long gr1_tlb_int1:1; /* RW */
+ unsigned long gr1_tlb_int2:1; /* RW */
+ unsigned long gr1_tlb_int3:1; /* RW */
+ unsigned long gr1_tlb_int4:1; /* RW */
+ unsigned long gr1_tlb_int5:1; /* RW */
+ unsigned long gr1_tlb_int6:1; /* RW */
+ unsigned long gr1_tlb_int7:1; /* RW */
+ unsigned long gr1_tlb_int8:1; /* RW */
+ unsigned long gr1_tlb_int9:1; /* RW */
+ unsigned long gr1_tlb_int10:1; /* RW */
+ unsigned long gr1_tlb_int11:1; /* RW */
+ unsigned long gr1_tlb_int12:1; /* RW */
+ unsigned long gr1_tlb_int13:1; /* RW */
+ unsigned long gr1_tlb_int14:1; /* RW */
+ unsigned long gr1_tlb_int15:1; /* RW */
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rsvd_52_63:12;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_event_occurred1_s {
+ unsigned long bau_data:1; /* RW */
+ unsigned long power_management_req:1; /* RW */
+ unsigned long message_accelerator_int0:1; /* RW */
+ unsigned long message_accelerator_int1:1; /* RW */
+ unsigned long message_accelerator_int2:1; /* RW */
+ unsigned long message_accelerator_int3:1; /* RW */
+ unsigned long message_accelerator_int4:1; /* RW */
+ unsigned long message_accelerator_int5:1; /* RW */
+ unsigned long message_accelerator_int6:1; /* RW */
+ unsigned long message_accelerator_int7:1; /* RW */
+ unsigned long message_accelerator_int8:1; /* RW */
+ unsigned long message_accelerator_int9:1; /* RW */
+ unsigned long message_accelerator_int10:1; /* RW */
+ unsigned long message_accelerator_int11:1; /* RW */
+ unsigned long message_accelerator_int12:1; /* RW */
+ unsigned long message_accelerator_int13:1; /* RW */
+ unsigned long message_accelerator_int14:1; /* RW */
+ unsigned long message_accelerator_int15:1; /* RW */
+ unsigned long gr0_tlb_int0:1; /* RW */
+ unsigned long gr0_tlb_int1:1; /* RW */
+ unsigned long gr0_tlb_int2:1; /* RW */
+ unsigned long gr0_tlb_int3:1; /* RW */
+ unsigned long gr0_tlb_int4:1; /* RW */
+ unsigned long gr0_tlb_int5:1; /* RW */
+ unsigned long gr0_tlb_int6:1; /* RW */
+ unsigned long gr0_tlb_int7:1; /* RW */
+ unsigned long gr0_tlb_int8:1; /* RW */
+ unsigned long gr0_tlb_int9:1; /* RW */
+ unsigned long gr0_tlb_int10:1; /* RW */
+ unsigned long gr0_tlb_int11:1; /* RW */
+ unsigned long gr0_tlb_int12:1; /* RW */
+ unsigned long gr0_tlb_int13:1; /* RW */
+ unsigned long gr0_tlb_int14:1; /* RW */
+ unsigned long gr0_tlb_int15:1; /* RW */
+ unsigned long gr1_tlb_int0:1; /* RW */
+ unsigned long gr1_tlb_int1:1; /* RW */
+ unsigned long gr1_tlb_int2:1; /* RW */
+ unsigned long gr1_tlb_int3:1; /* RW */
+ unsigned long gr1_tlb_int4:1; /* RW */
+ unsigned long gr1_tlb_int5:1; /* RW */
+ unsigned long gr1_tlb_int6:1; /* RW */
+ unsigned long gr1_tlb_int7:1; /* RW */
+ unsigned long gr1_tlb_int8:1; /* RW */
+ unsigned long gr1_tlb_int9:1; /* RW */
+ unsigned long gr1_tlb_int10:1; /* RW */
+ unsigned long gr1_tlb_int11:1; /* RW */
+ unsigned long gr1_tlb_int12:1; /* RW */
+ unsigned long gr1_tlb_int13:1; /* RW */
+ unsigned long gr1_tlb_int14:1; /* RW */
+ unsigned long gr1_tlb_int15:1; /* RW */
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rsvd_52_63:12;
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED1_ALIAS */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED1_ALIAS 0x70088UL
+
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED2 */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED2 0x70100UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 0
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 1
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED2_RTC_0_SHFT 2
+#define UVYH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED2_RTC_1_SHFT 3
+#define UVYH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED2_RTC_2_SHFT 4
+#define UVYH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED2_RTC_3_SHFT 5
+#define UVYH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED2_RTC_4_SHFT 6
+#define UVYH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED2_RTC_5_SHFT 7
+#define UVYH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED2_RTC_6_SHFT 8
+#define UVYH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED2_RTC_7_SHFT 9
+#define UVYH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED2_RTC_8_SHFT 10
+#define UVYH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED2_RTC_9_SHFT 11
+#define UVYH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED2_RTC_10_SHFT 12
+#define UVYH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED2_RTC_11_SHFT 13
+#define UVYH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED2_RTC_12_SHFT 14
+#define UVYH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED2_RTC_13_SHFT 15
+#define UVYH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED2_RTC_14_SHFT 16
+#define UVYH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED2_RTC_15_SHFT 17
+#define UVYH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED2_RTC_16_SHFT 18
+#define UVYH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED2_RTC_17_SHFT 19
+#define UVYH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED2_RTC_18_SHFT 20
+#define UVYH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED2_RTC_19_SHFT 21
+#define UVYH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED2_RTC_20_SHFT 22
+#define UVYH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED2_RTC_21_SHFT 23
+#define UVYH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED2_RTC_22_SHFT 24
+#define UVYH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_23_SHFT 25
+#define UVYH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_24_SHFT 26
+#define UVYH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_25_SHFT 27
+#define UVYH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_26_SHFT 28
+#define UVYH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_27_SHFT 29
+#define UVYH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_28_SHFT 30
+#define UVYH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_29_SHFT 31
+#define UVYH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_30_SHFT 32
+#define UVYH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_31_SHFT 33
+#define UVYH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000200000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18
+#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19
+#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20
+#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21
+#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22
+#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23
+#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24
+#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25
+#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26
+#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27
+#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28
+#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29
+#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30
+#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31
+#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32
+#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33
+#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34
+#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35
+#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36
+#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37
+#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38
+#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39
+#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40
+#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41
+#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42
+#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43
+#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44
+#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45
+#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46
+#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47
+#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48
+#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49
+#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+#define UVH_EVENT_OCCURRED2_RTC_1_MASK ( \
+ is_uv(UV5) ? 0x0000000000000008UL : \
+ is_uv(UV4) ? 0x0000000000080000UL : \
+ is_uv(UV3) ? 0x0000000000000002UL : \
+ is_uv(UV2) ? 0x0000000000000002UL : \
+ 0)
+#define UVH_EVENT_OCCURRED2_RTC_1_SHFT ( \
+ is_uv(UV5) ? 3 : \
+ is_uv(UV4) ? 19 : \
+ is_uv(UV3) ? 1 : \
+ is_uv(UV2) ? 1 : \
+ -1)
+
+union uvyh_event_occurred2_u {
+ unsigned long v;
+
+ /* UVYH common struct */
+ struct uvyh_event_occurred2_s {
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_34_63:30;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_event_occurred2_s {
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_34_63:30;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_event_occurred2_s {
+ unsigned long message_accelerator_int0:1; /* RW */
+ unsigned long message_accelerator_int1:1; /* RW */
+ unsigned long message_accelerator_int2:1; /* RW */
+ unsigned long message_accelerator_int3:1; /* RW */
+ unsigned long message_accelerator_int4:1; /* RW */
+ unsigned long message_accelerator_int5:1; /* RW */
+ unsigned long message_accelerator_int6:1; /* RW */
+ unsigned long message_accelerator_int7:1; /* RW */
+ unsigned long message_accelerator_int8:1; /* RW */
+ unsigned long message_accelerator_int9:1; /* RW */
+ unsigned long message_accelerator_int10:1; /* RW */
+ unsigned long message_accelerator_int11:1; /* RW */
+ unsigned long message_accelerator_int12:1; /* RW */
+ unsigned long message_accelerator_int13:1; /* RW */
+ unsigned long message_accelerator_int14:1; /* RW */
+ unsigned long message_accelerator_int15:1; /* RW */
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_50_63:14;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_event_occurred2_s {
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_32_63:32;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_event_occurred2_s {
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_32_63:32;
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_EVENT_OCCURRED2_ALIAS */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED2_ALIAS 0x70108UL
/* ========================================================================= */
@@ -585,51 +2135,148 @@ union uvh_event_occurred0_u {
/* ========================================================================= */
#define UVH_EXTIO_INT0_BROADCAST 0x61448UL
-#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV4H_EXTIO_INT0_BROADCAST_32 0x310
-#define UVH_EXTIO_INT0_BROADCAST_32 ( \
- is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 : \
- is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 : \
- /*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32)
-
+/* UVH common defines*/
#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0
#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL
union uvh_extio_int0_broadcast_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_extio_int0_broadcast_s {
unsigned long enable:1; /* RW */
unsigned long rsvd_1_63:63;
} s;
+
+ /* UV5 unique struct */
+ struct uv5h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_GR0_GAM_GR_CONFIG */
+/* ========================================================================= */
+#define UVH_GR0_GAM_GR_CONFIG ( \
+ is_uv(UV5) ? 0x600028UL : \
+ is_uv(UV4) ? 0x600028UL : \
+ is_uv(UV3) ? 0xc00028UL : \
+ is_uv(UV2) ? 0xc00028UL : \
+ 0)
+
+
+
+/* UVYH common defines */
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+
+/* UV4 unique defines */
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+
+/* UV3 unique defines */
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT 0
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+
+/* UV2 unique defines */
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_SHFT 0
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_MASK 0x000000000000000fUL
+
+
+union uvyh_gr0_gam_gr_config_u {
+ unsigned long v;
+
+ /* UVYH common struct */
+ struct uvyh_gr0_gam_gr_config_s {
+ unsigned long rsvd_0_9:10;
+ unsigned long subspace:1; /* RW */
+ unsigned long rsvd_11_63:53;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_gr0_gam_gr_config_s {
+ unsigned long rsvd_0_9:10;
+ unsigned long subspace:1; /* RW */
+ unsigned long rsvd_11_63:53;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_gr0_gam_gr_config_s {
+ unsigned long rsvd_0_9:10;
+ unsigned long subspace:1; /* RW */
+ unsigned long rsvd_11_63:53;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_gr0_gam_gr_config_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long undef_6_9:4; /* Undefined */
+ unsigned long subspace:1; /* RW */
+ unsigned long reserved:53;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_gr0_gam_gr_config_s {
+ unsigned long n_gr:4; /* RW */
+ unsigned long reserved:60;
+ } s2;
};
/* ========================================================================= */
/* UVH_GR0_TLB_INT0_CONFIG */
/* ========================================================================= */
-#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
-
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
-#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
-#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13
-#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15
-#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+#define UVH_GR0_TLB_INT0_CONFIG ( \
+ is_uv(UV4) ? 0x61b00UL : \
+ is_uv(UV3) ? 0x61b00UL : \
+ is_uv(UV2) ? 0x61b00UL : \
+ uv_undefined("UVH_GR0_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT 13
+#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT 15
+#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT 16
+#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr0_tlb_int0_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr0_tlb_int0_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -642,33 +2289,97 @@ union uvh_gr0_tlb_int0_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
+
+ /* UVXH common struct */
+ struct uvxh_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_gr0_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s2;
};
/* ========================================================================= */
/* UVH_GR0_TLB_INT1_CONFIG */
/* ========================================================================= */
-#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
-
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
-#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
-#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13
-#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15
-#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+#define UVH_GR0_TLB_INT1_CONFIG ( \
+ is_uv(UV4) ? 0x61b40UL : \
+ is_uv(UV3) ? 0x61b40UL : \
+ is_uv(UV2) ? 0x61b40UL : \
+ uv_undefined("UVH_GR0_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT 13
+#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT 15
+#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT 16
+#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr0_tlb_int1_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr0_tlb_int1_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -681,382 +2392,97 @@ union uvh_gr0_tlb_int1_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
-};
-/* ========================================================================= */
-/* UVH_GR0_TLB_MMR_CONTROL */
-/* ========================================================================= */
-#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL
-#define UVH_GR0_TLB_MMR_CONTROL ( \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL)
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-
-#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
-
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 13
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK ( \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK ( \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT ( \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT)
-
-union uvh_gr0_tlb_mmr_control_u {
- unsigned long v;
- struct uvh_gr0_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_48:17;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
- } s;
- struct uvxh_gr0_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long rsvd_48:1;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
+ /* UVXH common struct */
+ struct uvxh_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} sx;
- struct uv2h_gr0_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long mmr_inj_con:1; /* RW */
- unsigned long rsvd_49_51:3;
- unsigned long mmr_inj_tlbram:1; /* RW */
- unsigned long rsvd_53_63:11;
- } s2;
- struct uv3h_gr0_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long undef_52:1; /* Undefined */
- unsigned long rsvd_53_63:11;
- } s3;
- struct uv4h_gr0_tlb_mmr_control_s {
- unsigned long index:13; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_15:1;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_58:7;
- unsigned long page_size:5; /* RW */
+
+ /* UV4 unique struct */
+ struct uv4h_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s4;
-};
-/* ========================================================================= */
-/* UVH_GR0_TLB_MMR_READ_DATA_HI */
-/* ========================================================================= */
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI ( \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT 34
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 49
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_hi_u {
- unsigned long v;
- struct uv2h_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s2;
- struct uv3h_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_46_54:9; /* Undefined */
- unsigned long way_ecc:9; /* RO */
+ /* UV3 unique struct */
+ struct uv3h_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s3;
- struct uv4h_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:34; /* RO */
- unsigned long pnid:15; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_54:1; /* Undefined */
- unsigned long way_ecc:9; /* RO */
- } s4;
-};
-/* ========================================================================= */
-/* UVH_GR0_TLB_MMR_READ_DATA_LO */
-/* ========================================================================= */
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO ( \
- is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \
- is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO : \
- /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_lo_u {
- unsigned long v;
- struct uvh_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s;
- struct uvxh_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } sx;
- struct uv2h_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
+ /* UV2 unique struct */
+ struct uv2h_gr0_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s2;
- struct uv3h_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s3;
- struct uv4h_gr0_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s4;
};
/* ========================================================================= */
/* UVH_GR1_TLB_INT0_CONFIG */
/* ========================================================================= */
-#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL
#define UVH_GR1_TLB_INT0_CONFIG ( \
- is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG : \
- is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG)
-
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
-#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
-#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13
-#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15
-#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+ is_uv(UV4) ? 0x62100UL : \
+ is_uv(UV3) ? 0x61f00UL : \
+ is_uv(UV2) ? 0x61f00UL : \
+ uv_undefined("UVH_GR1_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT 13
+#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT 15
+#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT 16
+#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr1_tlb_int0_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr1_tlb_int0_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -1069,39 +2495,97 @@ union uvh_gr1_tlb_int0_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
+
+ /* UVXH common struct */
+ struct uvxh_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_gr1_tlb_int0_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s2;
};
/* ========================================================================= */
/* UVH_GR1_TLB_INT1_CONFIG */
/* ========================================================================= */
-#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL
#define UVH_GR1_TLB_INT1_CONFIG ( \
- is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG : \
- is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG)
-
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
-#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
-#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13
-#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15
-#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
-#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
-#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
-#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
-#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+ is_uv(UV4) ? 0x62140UL : \
+ is_uv(UV3) ? 0x61f40UL : \
+ is_uv(UV2) ? 0x61f40UL : \
+ uv_undefined("UVH_GR1_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT 13
+#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT 15
+#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT 16
+#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_gr1_tlb_int1_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_gr1_tlb_int1_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -1114,337 +2598,62 @@ union uvh_gr1_tlb_int1_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
-};
-/* ========================================================================= */
-/* UVH_GR1_TLB_MMR_CONTROL */
-/* ========================================================================= */
-#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL
-#define UVH_GR1_TLB_MMR_CONTROL ( \
- is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \
- is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL)
-
-#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-
-#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT 48
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT 52
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL
-
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 13
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL
-
-
-union uvh_gr1_tlb_mmr_control_u {
- unsigned long v;
- struct uvh_gr1_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long rsvd_32_48:17;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
- } s;
- struct uvxh_gr1_tlb_mmr_control_s {
- unsigned long rsvd_0_15:16;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long rsvd_48:1;
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_63:12;
+ /* UVXH common struct */
+ struct uvxh_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} sx;
- struct uv2h_gr1_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long rsvd_21_29:9;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long mmr_inj_con:1; /* RW */
- unsigned long rsvd_49_51:3;
- unsigned long mmr_inj_tlbram:1; /* RW */
- unsigned long rsvd_53_63:11;
- } s2;
- struct uv3h_gr1_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long undef_52:1; /* Undefined */
- unsigned long rsvd_53_63:11;
- } s3;
- struct uv4h_gr1_tlb_mmr_control_s {
- unsigned long index:13; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_15:1;
- unsigned long auto_valid_en:1; /* RW */
- unsigned long rsvd_17_19:3;
- unsigned long mmr_hash_index_en:1; /* RW */
- unsigned long ecc_sel:1; /* RW */
- unsigned long rsvd_22_29:8;
- unsigned long mmr_write:1; /* WP */
- unsigned long mmr_read:1; /* WP */
- unsigned long mmr_op_done:1; /* RW */
- unsigned long rsvd_33_47:15;
- unsigned long undef_48:1; /* Undefined */
- unsigned long rsvd_49_51:3;
- unsigned long rsvd_52_58:7;
- unsigned long page_size:5; /* RW */
+
+ /* UV4 unique struct */
+ struct uv4h_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s4;
-};
-/* ========================================================================= */
-/* UVH_GR1_TLB_MMR_READ_DATA_HI */
-/* ========================================================================= */
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI ( \
- is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \
- is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT 34
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 49
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_hi_u {
- unsigned long v;
- struct uv2h_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s2;
- struct uv3h_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_46_54:9; /* Undefined */
- unsigned long way_ecc:9; /* RO */
+ /* UV3 unique struct */
+ struct uv3h_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s3;
- struct uv4h_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:34; /* RO */
- unsigned long pnid:15; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long aa_ext:1; /* RO */
- unsigned long undef_54:1; /* Undefined */
- unsigned long way_ecc:9; /* RO */
- } s4;
-};
-/* ========================================================================= */
-/* UVH_GR1_TLB_MMR_READ_DATA_LO */
-/* ========================================================================= */
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO ( \
- is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \
- is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO : \
- /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_lo_u {
- unsigned long v;
- struct uvh_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s;
- struct uvxh_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } sx;
- struct uv2h_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
+ /* UV2 unique struct */
+ struct uv2h_gr1_tlb_int1_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s2;
- struct uv3h_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s3;
- struct uv4h_gr1_tlb_mmr_read_data_lo_s {
- unsigned long vpn:39; /* RO */
- unsigned long asid:24; /* RO */
- unsigned long valid:1; /* RO */
- } s4;
};
/* ========================================================================= */
@@ -1452,52 +2661,43 @@ union uvh_gr1_tlb_mmr_read_data_lo_u {
/* ========================================================================= */
#define UVH_INT_CMPB 0x22080UL
+/* UVH common defines*/
#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
union uvh_int_cmpb_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_int_cmpb_s {
unsigned long real_time_cmpb:56; /* RW */
unsigned long rsvd_56_63:8;
} s;
-};
-
-/* ========================================================================= */
-/* UVH_INT_CMPC */
-/* ========================================================================= */
-#define UVH_INT_CMPC 0x22100UL
-
-
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL
-
-union uvh_int_cmpc_u {
- unsigned long v;
- struct uvh_int_cmpc_s {
- unsigned long real_time_cmpc:56; /* RW */
+ /* UV5 unique struct */
+ struct uv5h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
unsigned long rsvd_56_63:8;
- } s;
-};
+ } s5;
-/* ========================================================================= */
-/* UVH_INT_CMPD */
-/* ========================================================================= */
-#define UVH_INT_CMPD 0x22180UL
-
-
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL
+ /* UV4 unique struct */
+ struct uv4h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s4;
+ /* UV3 unique struct */
+ struct uv3h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s3;
-union uvh_int_cmpd_u {
- unsigned long v;
- struct uvh_int_cmpd_s {
- unsigned long real_time_cmpd:56; /* RW */
+ /* UV2 unique struct */
+ struct uv2h_int_cmpb_s {
+ unsigned long real_time_cmpb:56; /* RW */
unsigned long rsvd_56_63:8;
- } s;
+ } s2;
};
/* ========================================================================= */
@@ -1505,28 +2705,23 @@ union uvh_int_cmpd_u {
/* ========================================================================= */
#define UVH_IPI_INT 0x60500UL
-#define UV2H_IPI_INT_32 0x348
-#define UV3H_IPI_INT_32 0x348
-#define UV4H_IPI_INT_32 0x268
-#define UVH_IPI_INT_32 ( \
- is_uv2_hub() ? UV2H_IPI_INT_32 : \
- is_uv3_hub() ? UV3H_IPI_INT_32 : \
- /*is_uv4_hub*/ UV4H_IPI_INT_32)
-
+/* UVH common defines*/
#define UVH_IPI_INT_VECTOR_SHFT 0
-#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
-#define UVH_IPI_INT_DESTMODE_SHFT 11
-#define UVH_IPI_INT_APIC_ID_SHFT 16
-#define UVH_IPI_INT_SEND_SHFT 63
#define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
#define UVH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL
+#define UVH_IPI_INT_DESTMODE_SHFT 11
#define UVH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_IPI_INT_APIC_ID_SHFT 16
#define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL
+#define UVH_IPI_INT_SEND_SHFT 63
#define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL
union uvh_ipi_int_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_ipi_int_s {
unsigned long vector_:8; /* RW */
unsigned long delivery_mode:3; /* RW */
@@ -1536,903 +2731,105 @@ union uvh_ipi_int_u {
unsigned long rsvd_48_62:15;
unsigned long send:1; /* WP */
} s;
-};
-
-/* ========================================================================= */
-/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST ( \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
-
-
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
-
-
-union uvh_lb_bau_intd_payload_queue_first_u {
- unsigned long v;
- struct uv2h_lb_bau_intd_payload_queue_first_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_48:6;
- unsigned long node_id:14; /* RW */
- unsigned long rsvd_63:1;
- } s2;
- struct uv3h_lb_bau_intd_payload_queue_first_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_48:6;
- unsigned long node_id:14; /* RW */
- unsigned long rsvd_63:1;
- } s3;
-};
-
-/* ========================================================================= */
-/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST ( \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
-
-
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
-
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
-
-
-union uvh_lb_bau_intd_payload_queue_last_u {
- unsigned long v;
- struct uv2h_lb_bau_intd_payload_queue_last_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s2;
- struct uv3h_lb_bau_intd_payload_queue_last_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s3;
-};
-
-/* ========================================================================= */
-/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL ( \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
-
-
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
-
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+ /* UV5 unique struct */
+ struct uv5h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
+ } s5;
-union uvh_lb_bau_intd_payload_queue_tail_u {
- unsigned long v;
- struct uv2h_lb_bau_intd_payload_queue_tail_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s2;
- struct uv3h_lb_bau_intd_payload_queue_tail_s {
- unsigned long rsvd_0_3:4;
- unsigned long address:39; /* RW */
- unsigned long rsvd_43_63:21;
- } s3;
-};
+ /* UV4 unique struct */
+ struct uv4h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
+ } s4;
-/* ========================================================================= */
-/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE ( \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
-
-
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-
-union uvh_lb_bau_intd_software_acknowledge_u {
- unsigned long v;
- struct uv2h_lb_bau_intd_software_acknowledge_s {
- unsigned long pending_0:1; /* RW */
- unsigned long pending_1:1; /* RW */
- unsigned long pending_2:1; /* RW */
- unsigned long pending_3:1; /* RW */
- unsigned long pending_4:1; /* RW */
- unsigned long pending_5:1; /* RW */
- unsigned long pending_6:1; /* RW */
- unsigned long pending_7:1; /* RW */
- unsigned long timeout_0:1; /* RW */
- unsigned long timeout_1:1; /* RW */
- unsigned long timeout_2:1; /* RW */
- unsigned long timeout_3:1; /* RW */
- unsigned long timeout_4:1; /* RW */
- unsigned long timeout_5:1; /* RW */
- unsigned long timeout_6:1; /* RW */
- unsigned long timeout_7:1; /* RW */
- unsigned long rsvd_16_63:48;
- } s2;
- struct uv3h_lb_bau_intd_software_acknowledge_s {
- unsigned long pending_0:1; /* RW */
- unsigned long pending_1:1; /* RW */
- unsigned long pending_2:1; /* RW */
- unsigned long pending_3:1; /* RW */
- unsigned long pending_4:1; /* RW */
- unsigned long pending_5:1; /* RW */
- unsigned long pending_6:1; /* RW */
- unsigned long pending_7:1; /* RW */
- unsigned long timeout_0:1; /* RW */
- unsigned long timeout_1:1; /* RW */
- unsigned long timeout_2:1; /* RW */
- unsigned long timeout_3:1; /* RW */
- unsigned long timeout_4:1; /* RW */
- unsigned long timeout_5:1; /* RW */
- unsigned long timeout_6:1; /* RW */
- unsigned long timeout_7:1; /* RW */
- unsigned long rsvd_16_63:48;
+ /* UV3 unique struct */
+ struct uv3h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
} s3;
-};
-/* ========================================================================= */
-/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS ( \
- is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
- is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
- /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
-
-
-/* ========================================================================= */
-/* UVH_LB_BAU_MISC_CONTROL */
-/* ========================================================================= */
-#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL
-#define UVH_LB_BAU_MISC_CONTROL ( \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL)
-
-#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18
-#define UVH_LB_BAU_MISC_CONTROL_32 ( \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32)
-
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT 15
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT 37
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK 0x00000000000f8000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK 0x0000002000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK ( \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT ( \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK ( \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT \
- uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT ( \
- is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
- is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
- /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT)
-
-union uvh_lb_bau_misc_control_u {
- unsigned long v;
- struct uvh_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long rsvd_15_19:5;
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long rsvd_29_47:19;
- unsigned long fun:16; /* RW */
- } s;
- struct uvxh_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long rsvd_15_19:5;
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long rsvd_36_47:12;
- unsigned long fun:16; /* RW */
- } sx;
- struct uv2h_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long enable_intd_soft_ack_mode:1; /* RW */
- unsigned long intd_soft_ack_timeout_period:4; /* RW */
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long rsvd_36_47:12;
- unsigned long fun:16; /* RW */
+ /* UV2 unique struct */
+ struct uv2h_ipi_int_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long delivery_mode:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long rsvd_12_15:4;
+ unsigned long apic_id:32; /* RW */
+ unsigned long rsvd_48_62:15;
+ unsigned long send:1; /* WP */
} s2;
- struct uv3h_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long enable_intd_soft_ack_mode:1; /* RW */
- unsigned long intd_soft_ack_timeout_period:4; /* RW */
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
- unsigned long enable_intd_prefetch_hint:1; /* RW */
- unsigned long thread_kill_timebase:8; /* RW */
- unsigned long rsvd_46_47:2;
- unsigned long fun:16; /* RW */
- } s3;
- struct uv4h_lb_bau_misc_control_s {
- unsigned long rejection_delay:8; /* RW */
- unsigned long apic_mode:1; /* RW */
- unsigned long force_broadcast:1; /* RW */
- unsigned long force_lock_nop:1; /* RW */
- unsigned long qpi_agent_presence_vector:3; /* RW */
- unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long rsvd_15_19:5;
- unsigned long enable_dual_mapping_mode:1; /* RW */
- unsigned long vga_io_port_decode_enable:1; /* RW */
- unsigned long vga_io_port_16_bit_decode:1; /* RW */
- unsigned long suppress_dest_registration:1; /* RW */
- unsigned long programmed_initial_priority:3; /* RW */
- unsigned long use_incoming_priority:1; /* RW */
- unsigned long enable_programmed_initial_priority:1;/* RW */
- unsigned long enable_automatic_apic_mode_selection:1;/* RW */
- unsigned long apic_mode_status:1; /* RO */
- unsigned long suppress_interrupts_to_self:1; /* RW */
- unsigned long enable_lock_based_system_flush:1;/* RW */
- unsigned long enable_extended_sb_status:1; /* RW */
- unsigned long suppress_int_prio_udt_to_self:1;/* RW */
- unsigned long use_legacy_descriptor_formats:1;/* RW */
- unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
- unsigned long rsvd_37:1;
- unsigned long thread_kill_timebase:8; /* RW */
- unsigned long address_interleave_select:1; /* RW */
- unsigned long rsvd_47:1;
- unsigned long fun:16; /* RW */
- } s4;
-};
-
-/* ========================================================================= */
-/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL
-
-
-union uvh_lb_bau_sb_activation_control_u {
- unsigned long v;
- struct uvh_lb_bau_sb_activation_control_s {
- unsigned long index:6; /* RW */
- unsigned long rsvd_6_61:56;
- unsigned long push:1; /* WP */
- unsigned long init:1; /* WP */
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_0_u {
- unsigned long v;
- struct uvh_lb_bau_sb_activation_status_0_s {
- unsigned long status:64; /* RW */
- } s;
-};
-
-/* ========================================================================= */
-/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_1_u {
- unsigned long v;
- struct uvh_lb_bau_sb_activation_status_1_s {
- unsigned long status:64; /* RW */
- } s;
};
/* ========================================================================= */
-/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE)
-
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
-
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 53
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0xffe0000000000000UL
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
- is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
- is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
- is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK)
-
-/* ========================================================================= */
/* UVH_NODE_ID */
/* ========================================================================= */
#define UVH_NODE_ID 0x0UL
-#define UV2H_NODE_ID 0x0UL
-#define UV3H_NODE_ID 0x0UL
-#define UV4H_NODE_ID 0x0UL
+/* UVH common defines*/
#define UVH_NODE_ID_FORCE1_SHFT 0
-#define UVH_NODE_ID_MANUFACTURER_SHFT 1
-#define UVH_NODE_ID_PART_NUMBER_SHFT 12
-#define UVH_NODE_ID_REVISION_SHFT 28
-#define UVH_NODE_ID_NODE_ID_SHFT 32
#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UVH_NODE_ID_MANUFACTURER_SHFT 1
#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UVH_NODE_ID_PART_NUMBER_SHFT 12
#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UVH_NODE_ID_REVISION_SHFT 28
#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UVH_NODE_ID_NODE_ID_SHFT 32
+#define UVH_NODE_ID_NI_PORT_SHFT 57
-#define UVXH_NODE_ID_FORCE1_SHFT 0
-#define UVXH_NODE_ID_MANUFACTURER_SHFT 1
-#define UVXH_NODE_ID_PART_NUMBER_SHFT 12
-#define UVXH_NODE_ID_REVISION_SHFT 28
-#define UVXH_NODE_ID_NODE_ID_SHFT 32
-#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UVXH_NODE_ID_NI_PORT_SHFT 57
-#define UVXH_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UVXH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UVXH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UVXH_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+/* UVXH common defines */
#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50
#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
-#define UV2H_NODE_ID_FORCE1_SHFT 0
-#define UV2H_NODE_ID_MANUFACTURER_SHFT 1
-#define UV2H_NODE_ID_PART_NUMBER_SHFT 12
-#define UV2H_NODE_ID_REVISION_SHFT 28
-#define UV2H_NODE_ID_NODE_ID_SHFT 32
-#define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UV2H_NODE_ID_NI_PORT_SHFT 57
-#define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
-#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
-
-#define UV3H_NODE_ID_FORCE1_SHFT 0
-#define UV3H_NODE_ID_MANUFACTURER_SHFT 1
-#define UV3H_NODE_ID_PART_NUMBER_SHFT 12
-#define UV3H_NODE_ID_REVISION_SHFT 28
-#define UV3H_NODE_ID_NODE_ID_SHFT 32
-#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48
-#define UV3H_NODE_ID_RESERVED_2_SHFT 49
-#define UV3H_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UV3H_NODE_ID_NI_PORT_SHFT 57
-#define UV3H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UV3H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UV3H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UV3H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UV3H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
-#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
-#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
-#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
-#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
-
-#define UV4H_NODE_ID_FORCE1_SHFT 0
-#define UV4H_NODE_ID_MANUFACTURER_SHFT 1
-#define UV4H_NODE_ID_PART_NUMBER_SHFT 12
-#define UV4H_NODE_ID_REVISION_SHFT 28
-#define UV4H_NODE_ID_NODE_ID_SHFT 32
+/* UVYH common defines */
+#define UVYH_NODE_ID_NODE_ID_MASK 0x0000007f00000000UL
+#define UVYH_NODE_ID_NI_PORT_MASK 0x7e00000000000000UL
+
+/* UV4 unique defines */
#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48
-#define UV4H_NODE_ID_RESERVED_2_SHFT 49
-#define UV4H_NODE_ID_NODES_PER_BIT_SHFT 50
-#define UV4H_NODE_ID_NI_PORT_SHFT 57
-#define UV4H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
-#define UV4H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
-#define UV4H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
-#define UV4H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
-#define UV4H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
+#define UV4H_NODE_ID_RESERVED_2_SHFT 49
#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
-#define UV4H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
-#define UV4H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48
+#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
+#define UV3H_NODE_ID_RESERVED_2_SHFT 49
+#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
union uvh_node_id_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_node_id_s {
unsigned long force1:1; /* RO */
unsigned long manufacturer:11; /* RO */
unsigned long part_number:16; /* RO */
unsigned long revision:4; /* RO */
- unsigned long node_id:15; /* RW */
- unsigned long rsvd_47_63:17;
+ unsigned long rsvd_32_63:32;
} s;
+
+ /* UVXH common struct */
struct uvxh_node_id_s {
unsigned long force1:1; /* RO */
unsigned long manufacturer:11; /* RO */
@@ -2444,17 +2841,47 @@ union uvh_node_id_u {
unsigned long ni_port:5; /* RO */
unsigned long rsvd_62_63:2;
} sx;
- struct uv2h_node_id_s {
+
+ /* UVYH common struct */
+ struct uvyh_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:7; /* RW */
+ unsigned long rsvd_39_56:18;
+ unsigned long ni_port:6; /* RO */
+ unsigned long rsvd_63:1;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:7; /* RW */
+ unsigned long rsvd_39_56:18;
+ unsigned long ni_port:6; /* RO */
+ unsigned long rsvd_63:1;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_node_id_s {
unsigned long force1:1; /* RO */
unsigned long manufacturer:11; /* RO */
unsigned long part_number:16; /* RO */
unsigned long revision:4; /* RO */
unsigned long node_id:15; /* RW */
- unsigned long rsvd_47_49:3;
+ unsigned long rsvd_47:1;
+ unsigned long router_select:1; /* RO */
+ unsigned long rsvd_49:1;
unsigned long nodes_per_bit:7; /* RO */
unsigned long ni_port:5; /* RO */
unsigned long rsvd_62_63:2;
- } s2;
+ } s4;
+
+ /* UV3 unique struct */
struct uv3h_node_id_s {
unsigned long force1:1; /* RO */
unsigned long manufacturer:11; /* RO */
@@ -2468,186 +2895,569 @@ union uvh_node_id_u {
unsigned long ni_port:5; /* RO */
unsigned long rsvd_62_63:2;
} s3;
- struct uv4h_node_id_s {
+
+ /* UV2 unique struct */
+ struct uv2h_node_id_s {
unsigned long force1:1; /* RO */
unsigned long manufacturer:11; /* RO */
unsigned long part_number:16; /* RO */
unsigned long revision:4; /* RO */
unsigned long node_id:15; /* RW */
- unsigned long rsvd_47:1;
- unsigned long router_select:1; /* RO */
- unsigned long rsvd_49:1;
+ unsigned long rsvd_47_49:3;
unsigned long nodes_per_bit:7; /* RO */
unsigned long ni_port:5; /* RO */
unsigned long rsvd_62_63:2;
- } s4;
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_NODE_PRESENT_0 */
+/* ========================================================================= */
+#define UVH_NODE_PRESENT_0 ( \
+ is_uv(UV5) ? 0x1400UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_0_NODES_SHFT 0
+#define UVYH_NODE_PRESENT_0_NODES_MASK 0xffffffffffffffffUL
+
+
+union uvh_node_present_0_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_node_present_0_s {
+ unsigned long nodes:64; /* RW */
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_node_present_0_s {
+ unsigned long nodes:64; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_node_present_0_s {
+ unsigned long nodes:64; /* RW */
+ } s5;
+};
+
+/* ========================================================================= */
+/* UVH_NODE_PRESENT_1 */
+/* ========================================================================= */
+#define UVH_NODE_PRESENT_1 ( \
+ is_uv(UV5) ? 0x1408UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_1_NODES_SHFT 0
+#define UVYH_NODE_PRESENT_1_NODES_MASK 0xffffffffffffffffUL
+
+
+union uvh_node_present_1_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_node_present_1_s {
+ unsigned long nodes:64; /* RW */
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_node_present_1_s {
+ unsigned long nodes:64; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_node_present_1_s {
+ unsigned long nodes:64; /* RW */
+ } s5;
};
/* ========================================================================= */
/* UVH_NODE_PRESENT_TABLE */
/* ========================================================================= */
-#define UVH_NODE_PRESENT_TABLE 0x1400UL
+#define UVH_NODE_PRESENT_TABLE ( \
+ is_uv(UV4) ? 0x1400UL : \
+ is_uv(UV3) ? 0x1400UL : \
+ is_uv(UV2) ? 0x1400UL : \
+ 0)
-#define UV2H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV3H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV4H_NODE_PRESENT_TABLE_DEPTH 4
#define UVH_NODE_PRESENT_TABLE_DEPTH ( \
- is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH : \
- is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH : \
- /*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH)
+ is_uv(UV4) ? 4 : \
+ is_uv(UV3) ? 16 : \
+ is_uv(UV2) ? 16 : \
+ 0)
-#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
-#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
+
+/* UVXH common defines */
+#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT 0
+#define UVXH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
union uvh_node_present_table_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_node_present_table_s {
unsigned long nodes:64; /* RW */
} s;
+
+ /* UVXH common struct */
+ struct uvxh_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_node_present_table_s {
+ unsigned long nodes:64; /* RW */
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_RH10_GAM_ADDR_MAP_CONFIG */
+/* ========================================================================= */
+#define UVH_RH10_GAM_ADDR_MAP_CONFIG ( \
+ is_uv(UV5) ? 0x470000UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000001c0UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_SHFT 12
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_MASK 0x0000000000001000UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_SHFT 16
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_MASK 0x00000000000f0000UL
+
+
+union uvh_rh10_gam_addr_map_config_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_addr_map_config_s {
+ unsigned long undef_0_5:6; /* Undefined */
+ unsigned long n_skt:3; /* RW */
+ unsigned long undef_9_11:3; /* Undefined */
+ unsigned long ls_enable:1; /* RW */
+ unsigned long undef_13_15:3; /* Undefined */
+ unsigned long mk_tme_keyid_bits:4; /* RW */
+ unsigned long rsvd_20_63:44;
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_addr_map_config_s {
+ unsigned long undef_0_5:6; /* Undefined */
+ unsigned long n_skt:3; /* RW */
+ unsigned long undef_9_11:3; /* Undefined */
+ unsigned long ls_enable:1; /* RW */
+ unsigned long undef_13_15:3; /* Undefined */
+ unsigned long mk_tme_keyid_bits:4; /* RW */
+ unsigned long rsvd_20_63:44;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_addr_map_config_s {
+ unsigned long undef_0_5:6; /* Undefined */
+ unsigned long n_skt:3; /* RW */
+ unsigned long undef_9_11:3; /* Undefined */
+ unsigned long ls_enable:1; /* RW */
+ unsigned long undef_13_15:3; /* Undefined */
+ unsigned long mk_tme_keyid_bits:4; /* RW */
+ } s5;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
+/* UVH_RH10_GAM_GRU_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG ( \
+ is_uv(UV5) ? 0x4700b0UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 25
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x0070000000000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffe000000UL : \
+ 0)
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV5) ? 25 : \
+ -1)
+
+union uvh_rh10_gam_gru_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_gru_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long n_gru:3; /* RW */
+ unsigned long undef_55_62:8; /* Undefined */
unsigned long enable:1; /* RW */
} s;
- struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_gru_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long n_gru:3; /* RW */
+ unsigned long undef_55_62:8; /* Undefined */
unsigned long enable:1; /* RW */
- } sx;
- struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_gru_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long n_gru:3; /* RW */
+ unsigned long undef_55_62:8; /* Undefined */
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
+ } s5;
+};
+
+/* ========================================================================= */
+/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 */
+/* ========================================================================= */
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 ( \
+ is_uv(UV5) ? 0x473000UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffc000000UL : \
+ 0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \
+ is_uv(UV5) ? 26 : \
+ -1)
+
+union uvh_rh10_gam_mmioh_overlay_config0_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
unsigned long enable:1; /* RW */
- } s3;
- struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s {
- unsigned long rsvd_0_23:24;
- unsigned long base:8; /* RW */
- unsigned long rsvd_32_47:16;
- unsigned long m_alias:5; /* RW */
- unsigned long rsvd_53_62:10;
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
unsigned long enable:1; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s5;
+};
+
+/* ========================================================================= */
+/* UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 */
+/* ========================================================================= */
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 ( \
+ is_uv(UV5) ? 0x474000UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffc000000UL : \
+ 0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \
+ is_uv(UV5) ? 26 : \
+ -1)
+
+union uvh_rh10_gam_mmioh_overlay_config1_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:26; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long undef_62:1; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s5;
+};
+
+/* ========================================================================= */
+/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 */
+/* ========================================================================= */
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 ( \
+ is_uv(UV5) ? 0x473800UL : \
+ 0)
+
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \
+ is_uv(UV5) ? 128 : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x000000000000007fUL
+
+
+union uvh_rh10_gam_mmioh_redirect_config0_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } s5;
+};
+
+/* ========================================================================= */
+/* UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 */
+/* ========================================================================= */
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 ( \
+ is_uv(UV5) ? 0x474800UL : \
+ 0)
+
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \
+ is_uv(UV5) ? 128 : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x000000000000007fUL
+
+
+union uvh_rh10_gam_mmioh_redirect_config1_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:7; /* RW */
+ unsigned long rsvd_7_63:57;
+ } s5;
+};
+
+/* ========================================================================= */
+/* UVH_RH10_GAM_MMR_OVERLAY_CONFIG */
+/* ========================================================================= */
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG ( \
+ is_uv(UV5) ? 0x470090UL : \
+ 0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 25
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK 0x000ffffffe000000UL
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV5) ? 0x000ffffffe000000UL : \
+ 0)
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV5) ? 25 : \
+ -1)
+
+union uvh_rh10_gam_mmr_overlay_config_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh10_gam_mmr_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long undef_52_62:11; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s;
+
+ /* UVYH common struct */
+ struct uvyh_rh10_gam_mmr_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long undef_52_62:11; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } sy;
+
+ /* UV5 unique struct */
+ struct uv5h_rh10_gam_mmr_overlay_config_s {
+ unsigned long undef_0_24:25; /* Undefined */
+ unsigned long base:27; /* RW */
+ unsigned long undef_52_62:11; /* Undefined */
+ unsigned long enable:1; /* RW */
+ } s5;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_ADDR_MAP_CONFIG */
+/* ========================================================================= */
+#define UVH_RH_GAM_ADDR_MAP_CONFIG ( \
+ is_uv(UV4) ? 0x480000UL : \
+ is_uv(UV3) ? 0x1600000UL : \
+ is_uv(UV2) ? 0x1600000UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT 6
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_MASK 0x00000000000003c0UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT 0
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL
+
+
+union uvh_rh_gam_addr_map_config_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh_gam_addr_map_config_s {
+ unsigned long rsvd_0_5:6;
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_addr_map_config_s {
+ unsigned long rsvd_0_5:6;
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_addr_map_config_s {
+ unsigned long rsvd_0_5:6;
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
} s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_addr_map_config_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_addr_map_config_s {
+ unsigned long m_skt:6; /* RW */
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
+/* UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
+#define UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x4800c8UL : \
+ is_uv(UV3) ? 0x16000c8UL : \
+ is_uv(UV2) ? 0x16000c8UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+
+union uvh_rh_gam_alias_0_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -2655,7 +3465,9 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s;
- struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -2663,15 +3475,19 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -2679,66 +3495,96 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_0_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG */
+/* ========================================================================= */
+#define UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG ( \
+ is_uv(UV4) ? 0x4800d0UL : \
+ is_uv(UV3) ? 0x16000d0UL : \
+ is_uv(UV2) ? 0x16000d0UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
+
+
+union uvh_rh_gam_alias_0_redirect_config_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_0_redirect_config_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_0_redirect_config_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } sx;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_0_redirect_config_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
} s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_0_redirect_config_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_0_redirect_config_s {
+ unsigned long rsvd_0_23:24;
+ unsigned long dest_base:22; /* RW */
+ unsigned long rsvd_46_63:18;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
+/* UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
+#define UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x4800d8UL : \
+ is_uv(UV3) ? 0x16000d8UL : \
+ is_uv(UV2) ? 0x16000d8UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+
+union uvh_rh_gam_alias_1_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -2746,7 +3592,9 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s;
- struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -2754,15 +3602,19 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
@@ -2770,321 +3622,289 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_1_overlay_config_s {
unsigned long rsvd_0_23:24;
unsigned long base:8; /* RW */
unsigned long rsvd_32_47:16;
unsigned long m_alias:5; /* RW */
unsigned long rsvd_53_62:10;
unsigned long enable:1; /* RW */
- } s4;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
+/* UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR)
-
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG ( \
+ is_uv(UV4) ? 0x4800e0UL : \
+ is_uv(UV3) ? 0x16000e0UL : \
+ is_uv(UV2) ? 0x16000e0UL : \
+ 0)
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
-union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
+union uvh_rh_gam_alias_1_redirect_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s;
- struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} sx;
- struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s2;
- struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s3;
- struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_1_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s4;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */
+/* UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR)
-
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x4800e8UL : \
+ is_uv(UV3) ? 0x16000e8UL : \
+ is_uv(UV2) ? 0x16000e8UL : \
+ 0)
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
+union uvh_rh_gam_alias_2_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
} s;
- struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
- } s2;
- struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s {
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_2_overlay_config_s {
unsigned long rsvd_0_23:24;
- unsigned long dest_base:22; /* RW */
- unsigned long rsvd_46_63:18;
- } s4;
+ unsigned long base:8; /* RW */
+ unsigned long rsvd_32_47:16;
+ unsigned long m_alias:5; /* RW */
+ unsigned long rsvd_53_62:10;
+ unsigned long enable:1; /* RW */
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */
+/* UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR)
-
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG ( \
+ is_uv(UV4) ? 0x4800f0UL : \
+ is_uv(UV3) ? 0x16000f0UL : \
+ is_uv(UV2) ? 0x16000f0UL : \
+ 0)
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
-union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
+union uvh_rh_gam_alias_2_redirect_config_u {
unsigned long v;
- struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s;
- struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} sx;
- struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s2;
- struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
} s3;
- struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_alias_2_redirect_config_s {
unsigned long rsvd_0_23:24;
unsigned long dest_base:22; /* RW */
unsigned long rsvd_46_63:18;
- } s4;
-};
-
-/* ========================================================================= */
-/* UVH_RH_GAM_CONFIG_MMR */
-/* ========================================================================= */
-#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL
-#define UVH_RH_GAM_CONFIG_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR)
-
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
-
-
-union uvh_rh_gam_config_mmr_u {
- unsigned long v;
- struct uvh_rh_gam_config_mmr_s {
- unsigned long rsvd_0_5:6;
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } s;
- struct uvxh_rh_gam_config_mmr_s {
- unsigned long rsvd_0_5:6;
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } sx;
- struct uv2h_rh_gam_config_mmr_s {
- unsigned long m_skt:6; /* RW */
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
} s2;
- struct uv3h_rh_gam_config_mmr_s {
- unsigned long m_skt:6; /* RW */
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } s3;
- struct uv4h_rh_gam_config_mmr_s {
- unsigned long rsvd_0_5:6;
- unsigned long n_skt:4; /* RW */
- unsigned long rsvd_10_63:54;
- } s4;
};
/* ========================================================================= */
-/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
+/* UVH_RH_GAM_GRU_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT 62
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK ( \
- is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
- is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK)
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT ( \
- is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
- is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
- /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT)
-
-union uvh_rh_gam_gru_overlay_config_mmr_u {
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x480010UL : \
+ is_uv(UV3) ? 0x1600010UL : \
+ is_uv(UV2) ? 0x1600010UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT 52
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK 0x00f0000000000000UL
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffffc000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_SHFT 62
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_MASK 0x4000000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 28
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x00003ffff0000000UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffff0000000UL : \
+ is_uv(UV2) ? 0x00003ffff0000000UL : \
+ 0)
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 28 : \
+ is_uv(UV2) ? 28 : \
+ -1)
+
+union uvh_rh_gam_gru_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_51:52;
+
+ /* UVH common struct */
+ struct uvh_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_45:46;
+ unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
} s;
- struct uvxh_rh_gam_gru_overlay_config_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_gru_overlay_config_s {
unsigned long rsvd_0_45:46;
unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_27:28;
- unsigned long base:18; /* RW */
+
+ /* UV4A unique struct */
+ struct uv4ah_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_24:25;
+ unsigned long undef_25:1; /* Undefined */
+ unsigned long base:26; /* RW */
+ unsigned long n_gru:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_24:25;
+ unsigned long undef_25:1; /* Undefined */
+ unsigned long base:20; /* RW */
unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_gru_overlay_config_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_gru_overlay_config_s {
unsigned long rsvd_0_27:28;
unsigned long base:18; /* RW */
unsigned long rsvd_46_51:6;
@@ -3093,86 +3913,141 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
unsigned long mode:1; /* RW */
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_24:25;
- unsigned long undef_25:1; /* Undefined */
- unsigned long base:20; /* RW */
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_gru_overlay_config_s {
+ unsigned long rsvd_0_27:28;
+ unsigned long base:18; /* RW */
unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s4;
+ } s2;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG ( \
+ is_uv(UV2) ? 0x1600030UL : \
+ 0)
+
+
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT 27
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_MASK 0x00003ffff8000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_SHFT 46
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_MASK 0x000fc00000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_SHFT 52
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_MASK 0x00f0000000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV2) ? 27 : \
+ uv_undefined("UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT"))
+
+union uvh_rh_gam_mmioh_overlay_config_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_overlay_config_s {
+ unsigned long rsvd_0_26:27;
+ unsigned long base:19; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_overlay_config_s {
+ unsigned long rsvd_0_26:27;
+ unsigned long base:19; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } sx;
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_mmioh_overlay_config_s {
+ unsigned long rsvd_0_26:27;
+ unsigned long base:19; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s2;
+};
+
+/* ========================================================================= */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 ( \
+ is_uv(UV4) ? 0x483000UL : \
+ is_uv(UV3) ? 0x1603000UL : \
+ 0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ 0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 26 : \
+ -1)
+
+union uvh_rh_gam_mmioh_overlay_config0_u {
+ unsigned long v;
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_overlay_config0_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s3;
- struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_overlay_config0_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s4;
+ } sx;
+
+ /* UV4A unique struct */
struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s {
unsigned long rsvd_0_25:26;
unsigned long base:26; /* RW */
@@ -3181,71 +4056,94 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
unsigned long undef_62:1; /* Undefined */
unsigned long enable:1; /* RW */
} s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_overlay_config0_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */
+/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 */
/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 ( \
+ is_uv(UV4) ? 0x484000UL : \
+ is_uv(UV3) ? 0x1604000UL : \
+ 0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT 26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK 0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT 46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK 0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT 63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ 0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 26 : \
+ -1)
+
+union uvh_rh_gam_mmioh_overlay_config1_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_overlay_config1_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s3;
- struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_overlay_config1_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s4;
+ } sx;
+
+ /* UV4A unique struct */
struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s {
unsigned long rsvd_0_25:26;
unsigned long base:26; /* RW */
@@ -3254,232 +4152,275 @@ union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
unsigned long undef_62:1; /* Undefined */
unsigned long enable:1; /* RW */
} s4a;
-};
-/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */
-/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR)
-
-
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_mmioh_overlay_config_mmr_u {
- unsigned long v;
- struct uv2h_rh_gam_mmioh_overlay_config_mmr_s {
- unsigned long rsvd_0_26:27;
- unsigned long base:19; /* RW */
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
unsigned long m_io:6; /* RW */
- unsigned long n_io:4; /* RW */
+ unsigned long n_io:4;
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
- } s2;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_overlay_config1_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long m_io:6; /* RW */
+ unsigned long n_io:4;
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */
+/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 */
/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR)
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 ( \
+ is_uv(UV4) ? 0x483800UL : \
+ is_uv(UV3) ? 0x1603800UL : \
+ 0)
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH)
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH ( \
+ is_uv(UV4) ? 128 : \
+ is_uv(UV3) ? 128 : \
+ 0)
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK)
-
-union uvh_rh_gam_mmioh_redirect_config0_mmr_u {
+union uvh_rh_gam_mmioh_redirect_config0_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_redirect_config0_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s3;
- struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_redirect_config0_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s4;
- struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s {
+ } sx;
+
+ struct uv4ah_rh_gam_mmioh_redirect_config0_s {
unsigned long nasid:12; /* RW */
unsigned long rsvd_12_63:52;
} s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_redirect_config0_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */
+/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 */
/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR)
-
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH)
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 ( \
+ is_uv(UV4) ? 0x484800UL : \
+ is_uv(UV3) ? 0x1604800UL : \
+ 0)
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH ( \
+ is_uv(UV4) ? 128 : \
+ is_uv(UV3) ? 128 : \
+ 0)
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK ( \
- is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
- is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK)
-union uvh_rh_gam_mmioh_redirect_config1_mmr_u {
+union uvh_rh_gam_mmioh_redirect_config1_u {
unsigned long v;
- struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmioh_redirect_config1_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s3;
- struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s {
+ } s;
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmioh_redirect_config1_s {
unsigned long nasid:15; /* RW */
unsigned long rsvd_15_63:49;
- } s4;
- struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s {
+ } sx;
+
+ struct uv4ah_rh_gam_mmioh_redirect_config1_s {
unsigned long nasid:12; /* RW */
unsigned long rsvd_12_63:52;
} s4a;
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmioh_redirect_config1_s {
+ unsigned long nasid:15; /* RW */
+ unsigned long rsvd_15_63:49;
+ } s3;
};
/* ========================================================================= */
-/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
+/* UVH_RH_GAM_MMR_OVERLAY_CONFIG */
/* ========================================================================= */
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR ( \
- is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
- is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
- /*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_mmr_overlay_config_mmr_u {
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG ( \
+ is_uv(UV4) ? 0x480028UL : \
+ is_uv(UV3) ? 0x1600028UL : \
+ is_uv(UV2) ? 0x1600028UL : \
+ 0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT 26
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ is_uv(UV2) ? 0x00003ffffc000000UL : \
+ 0)
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT 63
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT 26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK 0x000ffffffc000000UL
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK ( \
+ is_uv(UV4A) ? 0x000ffffffc000000UL : \
+ is_uv(UV4) ? 0x00003ffffc000000UL : \
+ is_uv(UV3) ? 0x00003ffffc000000UL : \
+ is_uv(UV2) ? 0x00003ffffc000000UL : \
+ 0)
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT ( \
+ is_uv(UV4) ? 26 : \
+ is_uv(UV3) ? 26 : \
+ is_uv(UV2) ? 26 : \
+ -1)
+
+union uvh_rh_gam_mmr_overlay_config_u {
unsigned long v;
- struct uvh_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UVH common struct */
+ struct uvh_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} s;
- struct uvxh_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UVXH common struct */
+ struct uvxh_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} sx;
- struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UV4 unique struct */
+ struct uv4h_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
- } s2;
- struct uv3h_rh_gam_mmr_overlay_config_mmr_s {
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} s3;
- struct uv4h_rh_gam_mmr_overlay_config_mmr_s {
+
+ /* UV2 unique struct */
+ struct uv2h_rh_gam_mmr_overlay_config_s {
unsigned long rsvd_0_25:26;
unsigned long base:20; /* RW */
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
- } s4;
+ } s2;
};
/* ========================================================================= */
/* UVH_RTC */
/* ========================================================================= */
-#define UV2H_RTC 0x340000UL
-#define UV3H_RTC 0x340000UL
-#define UV4H_RTC 0xe0000UL
#define UVH_RTC ( \
- is_uv2_hub() ? UV2H_RTC : \
- is_uv3_hub() ? UV3H_RTC : \
- /*is_uv4_hub*/ UV4H_RTC)
+ is_uv(UV5) ? 0xe0000UL : \
+ is_uv(UV4) ? 0xe0000UL : \
+ is_uv(UV3) ? 0x340000UL : \
+ is_uv(UV2) ? 0x340000UL : \
+ 0)
+/* UVH common defines*/
#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
union uvh_rtc_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_rtc_s {
unsigned long real_time_clock:56; /* RW */
unsigned long rsvd_56_63:8;
} s;
+
+ /* UV5 unique struct */
+ struct uv5h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s5;
+
+ /* UV4 unique struct */
+ struct uv4h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s4;
+
+ /* UV3 unique struct */
+ struct uv3h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s3;
+
+ /* UV2 unique struct */
+ struct uv2h_rtc_s {
+ unsigned long real_time_clock:56; /* RW */
+ unsigned long rsvd_56_63:8;
+ } s2;
};
/* ========================================================================= */
@@ -3487,26 +4428,29 @@ union uvh_rtc_u {
/* ========================================================================= */
#define UVH_RTC1_INT_CONFIG 0x615c0UL
+/* UVH common defines*/
#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
-#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
-#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
-#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
-#define UVH_RTC1_INT_CONFIG_P_SHFT 13
-#define UVH_RTC1_INT_CONFIG_T_SHFT 15
-#define UVH_RTC1_INT_CONFIG_M_SHFT 16
-#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
+#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL
+#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
+#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
+#define UVH_RTC1_INT_CONFIG_P_SHFT 13
#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL
+#define UVH_RTC1_INT_CONFIG_T_SHFT 15
#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL
+#define UVH_RTC1_INT_CONFIG_M_SHFT 16
#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL
+#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
union uvh_rtc1_int_config_u {
unsigned long v;
+
+ /* UVH common struct */
struct uvh_rtc1_int_config_s {
unsigned long vector_:8; /* RW */
unsigned long dm:3; /* RW */
@@ -3519,591 +4463,175 @@ union uvh_rtc1_int_config_u {
unsigned long rsvd_17_31:15;
unsigned long apic_id:32; /* RW */
} s;
-};
-
-/* ========================================================================= */
-/* UVH_SCRATCH5 */
-/* ========================================================================= */
-#define UV2H_SCRATCH5 0x2d0200UL
-#define UV3H_SCRATCH5 0x2d0200UL
-#define UV4H_SCRATCH5 0xb0200UL
-#define UVH_SCRATCH5 ( \
- is_uv2_hub() ? UV2H_SCRATCH5 : \
- is_uv3_hub() ? UV3H_SCRATCH5 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5)
-
-#define UV2H_SCRATCH5_32 0x778
-#define UV3H_SCRATCH5_32 0x778
-#define UV4H_SCRATCH5_32 0x798
-#define UVH_SCRATCH5_32 ( \
- is_uv2_hub() ? UV2H_SCRATCH5_32 : \
- is_uv3_hub() ? UV3H_SCRATCH5_32 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_32)
-
-#define UVH_SCRATCH5_SCRATCH5_SHFT 0
-#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-
-
-union uvh_scratch5_u {
- unsigned long v;
- struct uvh_scratch5_s {
- unsigned long scratch5:64; /* RW, W1CS */
- } s;
-};
-/* ========================================================================= */
-/* UVH_SCRATCH5_ALIAS */
-/* ========================================================================= */
-#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV4H_SCRATCH5_ALIAS 0xb0208UL
-#define UVH_SCRATCH5_ALIAS ( \
- is_uv2_hub() ? UV2H_SCRATCH5_ALIAS : \
- is_uv3_hub() ? UV3H_SCRATCH5_ALIAS : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS)
-
-#define UV2H_SCRATCH5_ALIAS_32 0x780
-#define UV3H_SCRATCH5_ALIAS_32 0x780
-#define UV4H_SCRATCH5_ALIAS_32 0x7a0
-#define UVH_SCRATCH5_ALIAS_32 ( \
- is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 : \
- is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32)
-
-
-/* ========================================================================= */
-/* UVH_SCRATCH5_ALIAS_2 */
-/* ========================================================================= */
-#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
-#define UVH_SCRATCH5_ALIAS_2 ( \
- is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 : \
- is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 : \
- /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2)
-#define UVH_SCRATCH5_ALIAS_2_32 0x788
-
-
-/* ========================================================================= */
-/* UVXH_EVENT_OCCURRED2 */
-/* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2 0x70100UL
-
-#define UV2H_EVENT_OCCURRED2_32 0xb68
-#define UV3H_EVENT_OCCURRED2_32 0xb68
-#define UV4H_EVENT_OCCURRED2_32 0x608
-#define UVH_EVENT_OCCURRED2_32 ( \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32)
-
-
-#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0
-#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1
-#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2
-#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3
-#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4
-#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5
-#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6
-#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7
-#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8
-#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9
-#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10
-#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11
-#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12
-#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13
-#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14
-#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15
-#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16
-#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17
-#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18
-#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19
-#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20
-#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21
-#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22
-#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23
-#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24
-#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25
-#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26
-#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27
-#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28
-#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29
-#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30
-#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31
-#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
-#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
-#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
-#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
-#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
-#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
-#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
-#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
-#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
-#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
-#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
-#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
-#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
-#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
-#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
-
-#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0
-#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1
-#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2
-#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3
-#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4
-#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5
-#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6
-#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7
-#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8
-#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9
-#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10
-#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11
-#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12
-#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13
-#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14
-#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15
-#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16
-#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17
-#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18
-#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19
-#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20
-#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21
-#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22
-#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23
-#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24
-#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25
-#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26
-#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27
-#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28
-#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29
-#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30
-#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31
-#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
-#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
-#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
-#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
-#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
-#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
-#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
-#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
-#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
-#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
-#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
-#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
-#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
-#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
-#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
-#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+ /* UV5 unique struct */
+ struct uv5h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s5;
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17
-#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18
-#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19
-#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20
-#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21
-#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22
-#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23
-#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24
-#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25
-#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26
-#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27
-#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28
-#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29
-#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30
-#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31
-#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32
-#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33
-#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34
-#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35
-#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36
-#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37
-#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38
-#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39
-#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40
-#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41
-#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42
-#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43
-#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44
-#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45
-#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46
-#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47
-#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48
-#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL
-#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL
-#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL
-#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL
-#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL
-#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL
-#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL
-#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL
+ /* UV4 unique struct */
+ struct uv4h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s4;
-#define UVXH_EVENT_OCCURRED2_RTC_1_MASK ( \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK)
+ /* UV3 unique struct */
+ struct uv3h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
+ } s3;
-union uvh_event_occurred2_u {
- unsigned long v;
- struct uv2h_event_occurred2_s {
- unsigned long rtc_0:1; /* RW */
- unsigned long rtc_1:1; /* RW */
- unsigned long rtc_2:1; /* RW */
- unsigned long rtc_3:1; /* RW */
- unsigned long rtc_4:1; /* RW */
- unsigned long rtc_5:1; /* RW */
- unsigned long rtc_6:1; /* RW */
- unsigned long rtc_7:1; /* RW */
- unsigned long rtc_8:1; /* RW */
- unsigned long rtc_9:1; /* RW */
- unsigned long rtc_10:1; /* RW */
- unsigned long rtc_11:1; /* RW */
- unsigned long rtc_12:1; /* RW */
- unsigned long rtc_13:1; /* RW */
- unsigned long rtc_14:1; /* RW */
- unsigned long rtc_15:1; /* RW */
- unsigned long rtc_16:1; /* RW */
- unsigned long rtc_17:1; /* RW */
- unsigned long rtc_18:1; /* RW */
- unsigned long rtc_19:1; /* RW */
- unsigned long rtc_20:1; /* RW */
- unsigned long rtc_21:1; /* RW */
- unsigned long rtc_22:1; /* RW */
- unsigned long rtc_23:1; /* RW */
- unsigned long rtc_24:1; /* RW */
- unsigned long rtc_25:1; /* RW */
- unsigned long rtc_26:1; /* RW */
- unsigned long rtc_27:1; /* RW */
- unsigned long rtc_28:1; /* RW */
- unsigned long rtc_29:1; /* RW */
- unsigned long rtc_30:1; /* RW */
- unsigned long rtc_31:1; /* RW */
- unsigned long rsvd_32_63:32;
+ /* UV2 unique struct */
+ struct uv2h_rtc1_int_config_s {
+ unsigned long vector_:8; /* RW */
+ unsigned long dm:3; /* RW */
+ unsigned long destmode:1; /* RW */
+ unsigned long status:1; /* RO */
+ unsigned long p:1; /* RO */
+ unsigned long rsvd_14:1;
+ unsigned long t:1; /* RO */
+ unsigned long m:1; /* RW */
+ unsigned long rsvd_17_31:15;
+ unsigned long apic_id:32; /* RW */
} s2;
- struct uv3h_event_occurred2_s {
- unsigned long rtc_0:1; /* RW */
- unsigned long rtc_1:1; /* RW */
- unsigned long rtc_2:1; /* RW */
- unsigned long rtc_3:1; /* RW */
- unsigned long rtc_4:1; /* RW */
- unsigned long rtc_5:1; /* RW */
- unsigned long rtc_6:1; /* RW */
- unsigned long rtc_7:1; /* RW */
- unsigned long rtc_8:1; /* RW */
- unsigned long rtc_9:1; /* RW */
- unsigned long rtc_10:1; /* RW */
- unsigned long rtc_11:1; /* RW */
- unsigned long rtc_12:1; /* RW */
- unsigned long rtc_13:1; /* RW */
- unsigned long rtc_14:1; /* RW */
- unsigned long rtc_15:1; /* RW */
- unsigned long rtc_16:1; /* RW */
- unsigned long rtc_17:1; /* RW */
- unsigned long rtc_18:1; /* RW */
- unsigned long rtc_19:1; /* RW */
- unsigned long rtc_20:1; /* RW */
- unsigned long rtc_21:1; /* RW */
- unsigned long rtc_22:1; /* RW */
- unsigned long rtc_23:1; /* RW */
- unsigned long rtc_24:1; /* RW */
- unsigned long rtc_25:1; /* RW */
- unsigned long rtc_26:1; /* RW */
- unsigned long rtc_27:1; /* RW */
- unsigned long rtc_28:1; /* RW */
- unsigned long rtc_29:1; /* RW */
- unsigned long rtc_30:1; /* RW */
- unsigned long rtc_31:1; /* RW */
- unsigned long rsvd_32_63:32;
- } s3;
- struct uv4h_event_occurred2_s {
- unsigned long message_accelerator_int0:1; /* RW */
- unsigned long message_accelerator_int1:1; /* RW */
- unsigned long message_accelerator_int2:1; /* RW */
- unsigned long message_accelerator_int3:1; /* RW */
- unsigned long message_accelerator_int4:1; /* RW */
- unsigned long message_accelerator_int5:1; /* RW */
- unsigned long message_accelerator_int6:1; /* RW */
- unsigned long message_accelerator_int7:1; /* RW */
- unsigned long message_accelerator_int8:1; /* RW */
- unsigned long message_accelerator_int9:1; /* RW */
- unsigned long message_accelerator_int10:1; /* RW */
- unsigned long message_accelerator_int11:1; /* RW */
- unsigned long message_accelerator_int12:1; /* RW */
- unsigned long message_accelerator_int13:1; /* RW */
- unsigned long message_accelerator_int14:1; /* RW */
- unsigned long message_accelerator_int15:1; /* RW */
- unsigned long rtc_interval_int:1; /* RW */
- unsigned long bau_dashboard_int:1; /* RW */
- unsigned long rtc_0:1; /* RW */
- unsigned long rtc_1:1; /* RW */
- unsigned long rtc_2:1; /* RW */
- unsigned long rtc_3:1; /* RW */
- unsigned long rtc_4:1; /* RW */
- unsigned long rtc_5:1; /* RW */
- unsigned long rtc_6:1; /* RW */
- unsigned long rtc_7:1; /* RW */
- unsigned long rtc_8:1; /* RW */
- unsigned long rtc_9:1; /* RW */
- unsigned long rtc_10:1; /* RW */
- unsigned long rtc_11:1; /* RW */
- unsigned long rtc_12:1; /* RW */
- unsigned long rtc_13:1; /* RW */
- unsigned long rtc_14:1; /* RW */
- unsigned long rtc_15:1; /* RW */
- unsigned long rtc_16:1; /* RW */
- unsigned long rtc_17:1; /* RW */
- unsigned long rtc_18:1; /* RW */
- unsigned long rtc_19:1; /* RW */
- unsigned long rtc_20:1; /* RW */
- unsigned long rtc_21:1; /* RW */
- unsigned long rtc_22:1; /* RW */
- unsigned long rtc_23:1; /* RW */
- unsigned long rtc_24:1; /* RW */
- unsigned long rtc_25:1; /* RW */
- unsigned long rtc_26:1; /* RW */
- unsigned long rtc_27:1; /* RW */
- unsigned long rtc_28:1; /* RW */
- unsigned long rtc_29:1; /* RW */
- unsigned long rtc_30:1; /* RW */
- unsigned long rtc_31:1; /* RW */
- unsigned long rsvd_50_63:14;
- } s4;
};
/* ========================================================================= */
-/* UVXH_EVENT_OCCURRED2_ALIAS */
+/* UVH_SCRATCH5 */
/* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
-
-#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610
-#define UVH_EVENT_OCCURRED2_ALIAS_32 ( \
- is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 : \
- is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 : \
- /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32)
+#define UVH_SCRATCH5 ( \
+ is_uv(UV5) ? 0xb0200UL : \
+ is_uv(UV4) ? 0xb0200UL : \
+ is_uv(UV3) ? 0x2d0200UL : \
+ is_uv(UV2) ? 0x2d0200UL : \
+ 0)
+#define UV5H_SCRATCH5 0xb0200UL
+#define UV4H_SCRATCH5 0xb0200UL
+#define UV3H_SCRATCH5 0x2d0200UL
+#define UV2H_SCRATCH5 0x2d0200UL
+/* UVH common defines*/
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-/* ========================================================================= */
-/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2)
+/* UVXH common defines */
+#define UVXH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVXH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 ( \
- is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \
- is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \
- /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32)
+/* UVYH common defines */
+#define UVYH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVYH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UV5 unique defines */
+#define UV5H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV5H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UV4 unique defines */
+#define UV4H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV4H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UV3 unique defines */
+#define UV3H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV3H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+/* UV2 unique defines */
+#define UV2H_SCRATCH5_SCRATCH5_SHFT 0
+#define UV2H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
-union uvxh_lb_bau_sb_activation_status_2_u {
+union uvh_scratch5_u {
unsigned long v;
- struct uvxh_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } sx;
- struct uv2h_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } s2;
- struct uv3h_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } s3;
- struct uv4h_lb_bau_sb_activation_status_2_s {
- unsigned long aux_error:64; /* RW */
- } s4;
-};
-/* ========================================================================= */
-/* UV3H_GR0_GAM_GR_CONFIG */
-/* ========================================================================= */
-#define UV3H_GR0_GAM_GR_CONFIG 0xc00028UL
-
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT 0
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT 10
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
+ /* UVH common struct */
+ struct uvh_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s;
-union uv3h_gr0_gam_gr_config_u {
- unsigned long v;
- struct uv3h_gr0_gam_gr_config_s {
- unsigned long m_skt:6; /* RW */
- unsigned long undef_6_9:4; /* Undefined */
- unsigned long subspace:1; /* RW */
- unsigned long reserved:53;
- } s3;
-};
+ /* UVXH common struct */
+ struct uvxh_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } sx;
-/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_QUEUE_FIRST */
-/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST 0xa4100UL
+ /* UVYH common struct */
+ struct uvyh_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } sy;
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL
+ /* UV5 unique struct */
+ struct uv5h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s5;
-union uv4h_lb_proc_intd_queue_first_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_queue_first_s {
- unsigned long undef_0_5:6; /* Undefined */
- unsigned long first_payload_address:40; /* RW */
+ /* UV4 unique struct */
+ struct uv4h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
} s4;
-};
-/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_QUEUE_LAST */
-/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_LAST 0xa4108UL
-
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL
+ /* UV3 unique struct */
+ struct uv3h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s3;
-union uv4h_lb_proc_intd_queue_last_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_queue_last_s {
- unsigned long undef_0_4:5; /* Undefined */
- unsigned long last_payload_address:41; /* RW */
- } s4;
+ /* UV2 unique struct */
+ struct uv2h_scratch5_s {
+ unsigned long scratch5:64; /* RW */
+ } s2;
};
/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR */
+/* UVH_SCRATCH5_ALIAS */
/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR 0xa4118UL
+#define UVH_SCRATCH5_ALIAS ( \
+ is_uv(UV5) ? 0xb0208UL : \
+ is_uv(UV4) ? 0xb0208UL : \
+ is_uv(UV3) ? 0x2d0208UL : \
+ is_uv(UV2) ? 0x2d0208UL : \
+ 0)
+#define UV5H_SCRATCH5_ALIAS 0xb0208UL
+#define UV4H_SCRATCH5_ALIAS 0xb0208UL
+#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL
-
-union uv4h_lb_proc_intd_soft_ack_clear_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_soft_ack_clear_s {
- unsigned long soft_ack_pending_flags:8; /* WP */
- } s4;
-};
/* ========================================================================= */
-/* UV4H_LB_PROC_INTD_SOFT_ACK_PENDING */
+/* UVH_SCRATCH5_ALIAS_2 */
/* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING 0xa4110UL
-
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL
+#define UVH_SCRATCH5_ALIAS_2 ( \
+ is_uv(UV5) ? 0xb0210UL : \
+ is_uv(UV4) ? 0xb0210UL : \
+ is_uv(UV3) ? 0x2d0210UL : \
+ is_uv(UV2) ? 0x2d0210UL : \
+ 0)
+#define UV5H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
-union uv4h_lb_proc_intd_soft_ack_pending_u {
- unsigned long v;
- struct uv4h_lb_proc_intd_soft_ack_pending_s {
- unsigned long soft_ack_flags:8; /* RW */
- } s4;
-};
#endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cd7de4b401fe..f8ba5289ecb0 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -52,7 +52,7 @@
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES)
#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT)
#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING)
-#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP)
+#define SECONDARY_EXEC_ENABLE_RDTSCP VMCS_CONTROL_BIT(RDTSCP)
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC)
#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID)
#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING)
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 6807153c0410..dde5b3f1e7cd 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -4,29 +4,22 @@
#include <asm/bootparam.h>
+struct ghcb;
struct mpc_bus;
struct mpc_cpu;
+struct pt_regs;
struct mpc_table;
struct cpuinfo_x86;
+struct irq_domain;
/**
* struct x86_init_mpparse - platform specific mpparse ops
- * @mpc_record: platform specific mpc record accounting
* @setup_ioapic_ids: platform specific ioapic id override
- * @mpc_apic_id: platform specific mpc apic id assignment
- * @smp_read_mpc_oem: platform specific oem mpc table setup
- * @mpc_oem_pci_bus: platform specific pci bus setup (default NULL)
- * @mpc_oem_bus_info: platform specific mpc bus info
* @find_smp_config: find the smp configuration
* @get_smp_config: get the smp configuration
*/
struct x86_init_mpparse {
- void (*mpc_record)(unsigned int mode);
void (*setup_ioapic_ids)(void);
- int (*mpc_apic_id)(struct mpc_cpu *m);
- void (*smp_read_mpc_oem)(struct mpc_table *mpc);
- void (*mpc_oem_pci_bus)(struct mpc_bus *m);
- void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
void (*find_smp_config)(void);
void (*get_smp_config)(unsigned int early);
};
@@ -52,12 +45,14 @@ struct x86_init_resources {
* @intr_init: interrupt init code
* @intr_mode_select: interrupt delivery mode selection
* @intr_mode_init: interrupt delivery mode setup
+ * @create_pci_msi_domain: Create the PCI/MSI interrupt domain
*/
struct x86_init_irqs {
void (*pre_vector_init)(void);
void (*intr_init)(void);
void (*intr_mode_select)(void);
void (*intr_mode_init)(void);
+ struct irq_domain *(*create_pci_msi_domain)(void);
};
/**
@@ -236,10 +231,22 @@ struct x86_legacy_features {
/**
* struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
*
- * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely)
+ * @pin_vcpu: pin current vcpu to specified physical
+ * cpu (run rarely)
+ * @sev_es_hcall_prepare: Load additional hypervisor-specific
+ * state into the GHCB when doing a VMMCALL under
+ * SEV-ES. Called from the #VC exception handler.
+ * @sev_es_hcall_finish: Copies state from the GHCB back into the
+ * processor (or pt_regs). Also runs checks on the
+ * state returned from the hypervisor after a
+ * VMMCALL under SEV-ES. Needs to return 'false'
+ * if the checks fail. Called from the #VC
+ * exception handler.
*/
struct x86_hyper_runtime {
void (*pin_vcpu)(int cpu);
+ void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
+ bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
};
/**
@@ -283,9 +290,6 @@ struct x86_platform_ops {
struct pci_dev;
struct x86_msi_ops {
- int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
- void (*teardown_msi_irq)(unsigned int irq);
- void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev);
};
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0780f97c1850..89e5f3d1bba8 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -192,6 +192,26 @@ struct kvm_msr_list {
__u32 indices[0];
};
+/* Maximum size of any access bitmap in bytes */
+#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
+
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range {
+#define KVM_MSR_FILTER_READ (1 << 0)
+#define KVM_MSR_FILTER_WRITE (1 << 1)
+ __u32 flags;
+ __u32 nmsrs; /* number of msrs in bitmap */
+ __u32 base; /* MSR index the bitmap starts at */
+ __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+};
+
+#define KVM_MSR_FILTER_MAX_RANGES 16
+struct kvm_msr_filter {
+#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
+ __u32 flags;
+ struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
struct kvm_cpuid_entry {
__u32 function;
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 812e9b4c1114..950afebfba88 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -32,6 +32,7 @@
#define KVM_FEATURE_POLL_CONTROL 12
#define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_FEATURE_ASYNC_PF_INT 14
+#define KVM_FEATURE_MSI_EXT_DEST_ID 15
#define KVM_HINTS_REALTIME 0
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 2e8a30f06c74..f1d8307454e0 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -29,6 +29,7 @@
#define SVM_EXIT_WRITE_DR6 0x036
#define SVM_EXIT_WRITE_DR7 0x037
#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_LAST_EXCP 0x05f
#define SVM_EXIT_INTR 0x060
#define SVM_EXIT_NMI 0x061
#define SVM_EXIT_SMI 0x062
@@ -76,10 +77,21 @@
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
#define SVM_EXIT_RDPRU 0x08e
+#define SVM_EXIT_INVPCID 0x0a2
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
+/* SEV-ES software-defined VMGEXIT events */
+#define SVM_VMGEXIT_MMIO_READ 0x80000001
+#define SVM_VMGEXIT_MMIO_WRITE 0x80000002
+#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003
+#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004
+#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
+#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
+#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
+#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
+
#define SVM_EXIT_ERR -1
#define SVM_EXIT_REASONS \
@@ -171,6 +183,7 @@
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT, "mwait" }, \
{ SVM_EXIT_XSETBV, "xsetbv" }, \
+ { SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e77261db2391..68608bd892c0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -20,6 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
+CFLAGS_REMOVE_sev-es.o = -pg
endif
KASAN_SANITIZE_head$(BITS).o := n
@@ -27,6 +28,7 @@ KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
+KASAN_SANITIZE_sev-es.o := n
# With some compiler versions the generated code results in boot hangs, caused
# by several compilation units. To be safe, disable all instrumentation.
@@ -45,6 +47,8 @@ endif
# non-deterministic coverage.
KCOV_INSTRUMENT := n
+CFLAGS_head$(BITS).o += -fno-stack-protector
+
CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
obj-y := process_$(BITS).o signal.o
@@ -68,6 +72,7 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
obj-y += irqflags.o
+obj-y += static_call.o
obj-y += process.o
obj-y += fpu/
@@ -145,6 +150,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index cdaab30880b9..2400ad62f330 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -807,6 +807,15 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
temp_mm_state_t temp_state;
lockdep_assert_irqs_disabled();
+
+ /*
+ * Make sure not to be in TLB lazy mode, as otherwise we'll end up
+ * with a stale address space WITHOUT being in lazy mode after
+ * restoring the previous mm.
+ */
+ if (this_cpu_read(cpu_tlbstate.is_lazy))
+ leave_mm(smp_processor_id());
+
temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
switch_mm_irqs_off(NULL, mm, current);
@@ -1103,6 +1112,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
*/
goto out_put;
+ case RET_INSN_OPCODE:
+ int3_emulate_ret(regs);
+ break;
+
case CALL_INSN_OPCODE:
int3_emulate_call(regs, (long)ip + tp->rel32);
break;
@@ -1277,6 +1290,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
switch (tp->opcode) {
case INT3_INSN_OPCODE:
+ case RET_INSN_OPCODE:
break;
case CALL_INSN_OPCODE:
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index e89031e9c847..9ac696487b13 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -32,6 +32,7 @@
#include <linux/gfp.h>
#include <linux/atomic.h>
#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
#include <asm/mtrr.h>
#include <asm/proto.h>
#include <asm/iommu.h>
@@ -96,8 +97,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
PAGE_SIZE) >> PAGE_SHIFT;
- boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
- PAGE_SIZE) >> PAGE_SHIFT;
+ boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT);
spin_lock_irqsave(&iommu_bitmap_lock, flags);
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
@@ -468,7 +468,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
{
void *vaddr;
- vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs);
+ vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
if (!vaddr ||
!force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
return vaddr;
@@ -480,7 +480,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
goto out_free;
return vaddr;
out_free:
- dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
return NULL;
}
@@ -490,7 +490,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
- dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
static int no_agp;
@@ -678,6 +678,8 @@ static const struct dma_map_ops gart_dma_ops = {
.get_sgtable = dma_common_get_sgtable,
.dma_supported = dma_direct_supported,
.get_required_mask = dma_direct_get_required_mask,
+ .alloc_pages = dma_direct_alloc_pages,
+ .free_pages = dma_direct_free_pages,
};
static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5f943b938167..b3eef1d5c903 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1429,6 +1429,9 @@ void __init apic_intr_mode_init(void)
break;
}
+ if (x86_platform.apic_post_init)
+ x86_platform.apic_post_init();
+
apic_bsp_setup(upmode);
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 779a89e31c4c..7b3c7e0d4a09 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -860,10 +860,10 @@ void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node,
{
init_irq_alloc_info(info, NULL);
info->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
- info->ioapic_node = node;
- info->ioapic_trigger = trigger;
- info->ioapic_polarity = polarity;
- info->ioapic_valid = 1;
+ info->ioapic.node = node;
+ info->ioapic.trigger = trigger;
+ info->ioapic.polarity = polarity;
+ info->ioapic.valid = 1;
}
#ifndef CONFIG_ACPI
@@ -878,32 +878,32 @@ static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst,
copy_irq_alloc_info(dst, src);
dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
- dst->ioapic_id = mpc_ioapic_id(ioapic_idx);
- dst->ioapic_pin = pin;
- dst->ioapic_valid = 1;
- if (src && src->ioapic_valid) {
- dst->ioapic_node = src->ioapic_node;
- dst->ioapic_trigger = src->ioapic_trigger;
- dst->ioapic_polarity = src->ioapic_polarity;
+ dst->devid = mpc_ioapic_id(ioapic_idx);
+ dst->ioapic.pin = pin;
+ dst->ioapic.valid = 1;
+ if (src && src->ioapic.valid) {
+ dst->ioapic.node = src->ioapic.node;
+ dst->ioapic.trigger = src->ioapic.trigger;
+ dst->ioapic.polarity = src->ioapic.polarity;
} else {
- dst->ioapic_node = NUMA_NO_NODE;
+ dst->ioapic.node = NUMA_NO_NODE;
if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) {
- dst->ioapic_trigger = trigger;
- dst->ioapic_polarity = polarity;
+ dst->ioapic.trigger = trigger;
+ dst->ioapic.polarity = polarity;
} else {
/*
* PCI interrupts are always active low level
* triggered.
*/
- dst->ioapic_trigger = IOAPIC_LEVEL;
- dst->ioapic_polarity = IOAPIC_POL_LOW;
+ dst->ioapic.trigger = IOAPIC_LEVEL;
+ dst->ioapic.polarity = IOAPIC_POL_LOW;
}
}
}
static int ioapic_alloc_attr_node(struct irq_alloc_info *info)
{
- return (info && info->ioapic_valid) ? info->ioapic_node : NUMA_NO_NODE;
+ return (info && info->ioapic.valid) ? info->ioapic.node : NUMA_NO_NODE;
}
static void mp_register_handler(unsigned int irq, unsigned long trigger)
@@ -933,14 +933,14 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
* pin with real trigger and polarity attributes.
*/
if (irq < nr_legacy_irqs() && data->count == 1) {
- if (info->ioapic_trigger != data->trigger)
- mp_register_handler(irq, info->ioapic_trigger);
- data->entry.trigger = data->trigger = info->ioapic_trigger;
- data->entry.polarity = data->polarity = info->ioapic_polarity;
+ if (info->ioapic.trigger != data->trigger)
+ mp_register_handler(irq, info->ioapic.trigger);
+ data->entry.trigger = data->trigger = info->ioapic.trigger;
+ data->entry.polarity = data->polarity = info->ioapic.polarity;
}
- return data->trigger == info->ioapic_trigger &&
- data->polarity == info->ioapic_polarity;
+ return data->trigger == info->ioapic.trigger &&
+ data->polarity == info->ioapic.polarity;
}
static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
@@ -1002,7 +1002,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
if (!mp_check_pin_attr(irq, info))
return -EBUSY;
if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
- info->ioapic_pin))
+ info->ioapic.pin))
return -ENOMEM;
} else {
info->flags |= X86_IRQ_ALLOC_LEGACY;
@@ -2092,8 +2092,8 @@ static int mp_alloc_timer_irq(int ioapic, int pin)
struct irq_alloc_info info;
ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
- info.ioapic_id = mpc_ioapic_id(ioapic);
- info.ioapic_pin = pin;
+ info.devid = mpc_ioapic_id(ioapic);
+ info.ioapic.pin = pin;
mutex_lock(&ioapic_mutex);
irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info);
mutex_unlock(&ioapic_mutex);
@@ -2243,6 +2243,7 @@ static inline void __init check_timer(void)
legacy_pic->init(0);
legacy_pic->make_irq(0);
apic_write(APIC_LVT0, APIC_DM_EXTINT);
+ legacy_pic->unmask(0);
unlock_ExtINT_logic();
@@ -2296,9 +2297,9 @@ static int mp_irqdomain_create(int ioapic)
return 0;
init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_IOAPIC;
- info.ioapic_id = mpc_ioapic_id(ioapic);
- parent = irq_remapping_get_ir_irq_domain(&info);
+ info.type = X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT;
+ info.devid = mpc_ioapic_id(ioapic);
+ parent = irq_remapping_get_irq_domain(&info);
if (!parent)
parent = x86_vector_domain;
else
@@ -2932,9 +2933,9 @@ int mp_ioapic_registered(u32 gsi_base)
static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,
struct irq_alloc_info *info)
{
- if (info && info->ioapic_valid) {
- data->trigger = info->ioapic_trigger;
- data->polarity = info->ioapic_polarity;
+ if (info && info->ioapic.valid) {
+ data->trigger = info->ioapic.trigger;
+ data->polarity = info->ioapic.polarity;
} else if (acpi_get_override_irq(gsi, &data->trigger,
&data->polarity) < 0) {
/* PCI interrupts are always active low level triggered. */
@@ -2980,7 +2981,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
return -EINVAL;
ioapic = mp_irqdomain_ioapic_idx(domain);
- pin = info->ioapic_pin;
+ pin = info->ioapic.pin;
if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0)
return -EEXIST;
@@ -2988,7 +2989,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
if (!data)
return -ENOMEM;
- info->ioapic_entry = &data->entry;
+ info->ioapic.entry = &data->entry;
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
if (ret < 0) {
kfree(data);
@@ -2996,7 +2997,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
}
INIT_LIST_HEAD(&data->irq_2_pin);
- irq_data->hwirq = info->ioapic_pin;
+ irq_data->hwirq = info->ioapic.pin;
irq_data->chip = (domain->parent == x86_vector_domain) ?
&ioapic_chip : &ioapic_ir_chip;
irq_data->chip_data = data;
@@ -3006,8 +3007,8 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
local_irq_save(flags);
- if (info->ioapic_entry)
- mp_setup_entry(cfg, data, info->ioapic_entry);
+ if (info->ioapic.entry)
+ mp_setup_entry(cfg, data, info->ioapic.entry);
mp_register_handler(virq, data->trigger);
if (virq < nr_legacy_irqs())
legacy_pic->mask(virq);
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index c2b2911feeef..6313f0a05db7 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -21,7 +21,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
-static struct irq_domain *msi_default_domain;
+struct irq_domain *x86_pci_msi_default_domain __ro_after_init;
static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
{
@@ -45,7 +45,7 @@ static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
MSI_DATA_VECTOR(cfg->vector);
}
-static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+void x86_vector_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
__irq_msi_compose_msg(irqd_cfg(data), msg);
}
@@ -177,40 +177,10 @@ static struct irq_chip pci_msi_controller = {
.irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
- .irq_compose_msi_msg = irq_msi_compose_msg,
.irq_set_affinity = msi_set_affinity,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
- struct irq_domain *domain;
- struct irq_alloc_info info;
-
- init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_MSI;
- info.msi_dev = dev;
-
- domain = irq_remapping_get_irq_domain(&info);
- if (domain == NULL)
- domain = msi_default_domain;
- if (domain == NULL)
- return -ENOSYS;
-
- return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
-}
-
-void native_teardown_msi_irq(unsigned int irq)
-{
- irq_domain_free_irqs(irq, 1);
-}
-
-static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
- msi_alloc_info_t *arg)
-{
- return arg->msi_hwirq;
-}
-
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
msi_alloc_info_t *arg)
{
@@ -218,11 +188,10 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
struct msi_desc *desc = first_pci_msi_entry(pdev);
init_irq_alloc_info(arg, NULL);
- arg->msi_dev = pdev;
if (desc->msi_attrib.is_msix) {
- arg->type = X86_IRQ_ALLOC_TYPE_MSIX;
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
} else {
- arg->type = X86_IRQ_ALLOC_TYPE_MSI;
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
}
@@ -230,16 +199,8 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
}
EXPORT_SYMBOL_GPL(pci_msi_prepare);
-void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
-{
- arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc);
-}
-EXPORT_SYMBOL_GPL(pci_msi_set_desc);
-
static struct msi_domain_ops pci_msi_domain_ops = {
- .get_hwirq = pci_msi_get_hwirq,
.msi_prepare = pci_msi_prepare,
- .set_desc = pci_msi_set_desc,
};
static struct msi_domain_info pci_msi_domain_info = {
@@ -251,25 +212,32 @@ static struct msi_domain_info pci_msi_domain_info = {
.handler_name = "edge",
};
-void __init arch_init_msi_domain(struct irq_domain *parent)
+struct irq_domain * __init native_create_pci_msi_domain(void)
{
struct fwnode_handle *fn;
+ struct irq_domain *d;
if (disable_apic)
- return;
+ return NULL;
fn = irq_domain_alloc_named_fwnode("PCI-MSI");
- if (fn) {
- msi_default_domain =
- pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
- parent);
- }
- if (!msi_default_domain) {
+ if (!fn)
+ return NULL;
+
+ d = pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
+ x86_vector_domain);
+ if (!d) {
irq_domain_free_fwnode(fn);
- pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
+ pr_warn("Failed to initialize PCI-MSI irqdomain.\n");
} else {
- msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
+ d->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
}
+ return d;
+}
+
+void __init x86_create_pci_msi_domain(void)
+{
+ x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain();
}
#ifdef CONFIG_IRQ_REMAP
@@ -279,7 +247,6 @@ static struct irq_chip pci_msi_ir_controller = {
.irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
- .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -321,35 +288,28 @@ static struct irq_chip dmar_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
- .irq_compose_msi_msg = irq_msi_compose_msg,
.irq_write_msi_msg = dmar_msi_write_msg,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info,
- msi_alloc_info_t *arg)
-{
- return arg->dmar_id;
-}
-
static int dmar_msi_init(struct irq_domain *domain,
struct msi_domain_info *info, unsigned int virq,
irq_hw_number_t hwirq, msi_alloc_info_t *arg)
{
- irq_domain_set_info(domain, virq, arg->dmar_id, info->chip, NULL,
- handle_edge_irq, arg->dmar_data, "edge");
+ irq_domain_set_info(domain, virq, arg->devid, info->chip, NULL,
+ handle_edge_irq, arg->data, "edge");
return 0;
}
static struct msi_domain_ops dmar_msi_domain_ops = {
- .get_hwirq = dmar_msi_get_hwirq,
.msi_init = dmar_msi_init,
};
static struct msi_domain_info dmar_msi_domain_info = {
.ops = &dmar_msi_domain_ops,
.chip = &dmar_msi_controller,
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS,
};
static struct irq_domain *dmar_get_irq_domain(void)
@@ -384,8 +344,9 @@ int dmar_alloc_hwirq(int id, int node, void *arg)
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_DMAR;
- info.dmar_id = id;
- info.dmar_data = arg;
+ info.devid = id;
+ info.hwirq = id;
+ info.data = arg;
return irq_domain_alloc_irqs(domain, 1, node, &info);
}
@@ -419,24 +380,17 @@ static struct irq_chip hpet_msi_controller __ro_after_init = {
.irq_ack = irq_chip_ack_parent,
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
- .irq_compose_msi_msg = irq_msi_compose_msg,
.irq_write_msi_msg = hpet_msi_write_msg,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info,
- msi_alloc_info_t *arg)
-{
- return arg->hpet_index;
-}
-
static int hpet_msi_init(struct irq_domain *domain,
struct msi_domain_info *info, unsigned int virq,
irq_hw_number_t hwirq, msi_alloc_info_t *arg)
{
irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
- irq_domain_set_info(domain, virq, arg->hpet_index, info->chip, NULL,
- handle_edge_irq, arg->hpet_data, "edge");
+ irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL,
+ handle_edge_irq, arg->data, "edge");
return 0;
}
@@ -448,7 +402,6 @@ static void hpet_msi_free(struct irq_domain *domain,
}
static struct msi_domain_ops hpet_msi_domain_ops = {
- .get_hwirq = hpet_msi_get_hwirq,
.msi_init = hpet_msi_init,
.msi_free = hpet_msi_free,
};
@@ -456,6 +409,7 @@ static struct msi_domain_ops hpet_msi_domain_ops = {
static struct msi_domain_info hpet_msi_domain_info = {
.ops = &hpet_msi_domain_ops,
.chip = &hpet_msi_controller,
+ .flags = MSI_FLAG_USE_DEF_DOM_OPS,
};
struct irq_domain *hpet_create_irq_domain(int hpet_id)
@@ -476,9 +430,9 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
domain_info->data = (void *)(long)hpet_id;
init_irq_alloc_info(&info, NULL);
- info.type = X86_IRQ_ALLOC_TYPE_HPET;
- info.hpet_id = hpet_id;
- parent = irq_remapping_get_ir_irq_domain(&info);
+ info.type = X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT;
+ info.devid = hpet_id;
+ parent = irq_remapping_get_irq_domain(&info);
if (parent == NULL)
parent = x86_vector_domain;
else
@@ -506,9 +460,9 @@ int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_HPET;
- info.hpet_data = hc;
- info.hpet_id = hpet_dev_id(domain);
- info.hpet_index = dev_num;
+ info.data = hc;
+ info.devid = hpet_dev_id(domain);
+ info.hwirq = dev_num;
return irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
}
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99ee61c9ba54..67b6f7c049ec 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -170,9 +170,6 @@ void __init default_setup_apic_routing(void)
if (apic->setup_apic_routing)
apic->setup_apic_routing();
-
- if (x86_platform.apic_post_init)
- x86_platform.apic_post_init();
}
void __init generic_apic_probe(void)
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index bd3835d6b535..c46720f185c0 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -32,9 +32,6 @@ void __init default_setup_apic_routing(void)
break;
}
}
-
- if (x86_platform.apic_post_init)
- x86_platform.apic_post_init();
}
int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f8a56b5dc29f..1eac53632786 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -714,8 +714,6 @@ int __init arch_early_irq_init(void)
BUG_ON(x86_vector_domain == NULL);
irq_set_default_host(x86_vector_domain);
- arch_init_msi_domain(x86_vector_domain);
-
BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
/*
@@ -824,6 +822,7 @@ static struct irq_chip lapic_controller = {
.name = "APIC",
.irq_ack = apic_ack_edge,
.irq_set_affinity = apic_set_affinity,
+ .irq_compose_msi_msg = x86_vector_msi_compose_msg,
.irq_retrigger = apic_retrigger_irq,
};
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0b6eea3f54e6..714233cee0b5 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,6 +5,7 @@
*
* SGI UV APIC functions (note: not an Intel compatible APIC)
*
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/crash_dump.h>
@@ -29,19 +30,24 @@ static int uv_hubbed_system;
static int uv_hubless_system;
static u64 gru_start_paddr, gru_end_paddr;
static union uvh_apicid uvh_apicid;
+static int uv_node_id;
-/* Unpack OEM/TABLE ID's to be NULL terminated strings */
+/* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */
+static u8 uv_archtype[UV_AT_SIZE];
static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
-/* Information derived from CPUID: */
+/* Information derived from CPUID and some UV MMRs */
static struct {
unsigned int apicid_shift;
unsigned int apicid_mask;
unsigned int socketid_shift; /* aka pnode_shift for UV2/3 */
unsigned int pnode_mask;
+ unsigned int nasid_shift;
unsigned int gpa_shift;
unsigned int gnode_shift;
+ unsigned int m_skt;
+ unsigned int n_skt;
} uv_cpuid;
static int uv_min_hub_revision_id;
@@ -77,6 +83,9 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
static inline bool is_GRU_range(u64 start, u64 end)
{
+ if (!gru_start_paddr)
+ return false;
+
return start >= gru_start_paddr && end <= gru_end_paddr;
}
@@ -85,43 +94,102 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
return is_ISA_range(start, end) || is_GRU_range(start, end);
}
-static int __init early_get_pnodeid(void)
+static void __init early_get_pnodeid(void)
{
- union uvh_node_id_u node_id;
- union uvh_rh_gam_config_mmr_u m_n_config;
int pnode;
- /* Currently, all blades have same revision number */
+ uv_cpuid.m_skt = 0;
+ if (UVH_RH10_GAM_ADDR_MAP_CONFIG) {
+ union uvh_rh10_gam_addr_map_config_u m_n_config;
+
+ m_n_config.v = uv_early_read_mmr(UVH_RH10_GAM_ADDR_MAP_CONFIG);
+ uv_cpuid.n_skt = m_n_config.s.n_skt;
+ uv_cpuid.nasid_shift = 0;
+ } else if (UVH_RH_GAM_ADDR_MAP_CONFIG) {
+ union uvh_rh_gam_addr_map_config_u m_n_config;
+
+ m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG);
+ uv_cpuid.n_skt = m_n_config.s.n_skt;
+ if (is_uv(UV3))
+ uv_cpuid.m_skt = m_n_config.s3.m_skt;
+ if (is_uv(UV2))
+ uv_cpuid.m_skt = m_n_config.s2.m_skt;
+ uv_cpuid.nasid_shift = 1;
+ } else {
+ unsigned long GAM_ADDR_MAP_CONFIG = 0;
+
+ WARN(GAM_ADDR_MAP_CONFIG == 0,
+ "UV: WARN: GAM_ADDR_MAP_CONFIG is not available\n");
+ uv_cpuid.n_skt = 0;
+ uv_cpuid.nasid_shift = 0;
+ }
+
+ if (is_uv(UV4|UVY))
+ uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
+
+ uv_cpuid.pnode_mask = (1 << uv_cpuid.n_skt) - 1;
+ pnode = (uv_node_id >> uv_cpuid.nasid_shift) & uv_cpuid.pnode_mask;
+ uv_cpuid.gpa_shift = 46; /* Default unless changed */
+
+ pr_info("UV: n_skt:%d pnmsk:%x pn:%x\n",
+ uv_cpuid.n_skt, uv_cpuid.pnode_mask, pnode);
+}
+
+/* Running on a UV Hubbed system, determine which UV Hub Type it is */
+static int __init early_set_hub_type(void)
+{
+ union uvh_node_id_u node_id;
+
+ /*
+ * The NODE_ID MMR is always at offset 0.
+ * Contains the chip part # + revision.
+ * Node_id field started with 15 bits,
+ * ... now 7 but upper 8 are masked to 0.
+ * All blades/nodes have the same part # and hub revision.
+ */
node_id.v = uv_early_read_mmr(UVH_NODE_ID);
- m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
- uv_min_hub_revision_id = node_id.s.revision;
+ uv_node_id = node_id.sx.node_id;
switch (node_id.s.part_number) {
- case UV2_HUB_PART_NUMBER:
- case UV2_HUB_PART_NUMBER_X:
- uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+
+ case UV5_HUB_PART_NUMBER:
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV5_HUB_REVISION_BASE;
+ uv_hub_type_set(UV5);
break;
+
+ /* UV4/4A only have a revision difference */
+ case UV4_HUB_PART_NUMBER:
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV4_HUB_REVISION_BASE;
+ uv_hub_type_set(UV4);
+ if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE)
+ uv_hub_type_set(UV4|UV4A);
+ break;
+
case UV3_HUB_PART_NUMBER:
case UV3_HUB_PART_NUMBER_X:
- uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV3_HUB_REVISION_BASE;
+ uv_hub_type_set(UV3);
break;
- /* Update: UV4A has only a modified revision to indicate HUB fixes */
- case UV4_HUB_PART_NUMBER:
- uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
- uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
+ case UV2_HUB_PART_NUMBER:
+ case UV2_HUB_PART_NUMBER_X:
+ uv_min_hub_revision_id = node_id.s.revision
+ + UV2_HUB_REVISION_BASE - 1;
+ uv_hub_type_set(UV2);
break;
+
+ default:
+ return 0;
}
- uv_hub_info->hub_revision = uv_min_hub_revision_id;
- uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
- pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask;
- uv_cpuid.gpa_shift = 46; /* Default unless changed */
+ pr_info("UV: part#:%x rev:%d rev_id:%d UVtype:0x%x\n",
+ node_id.s.part_number, node_id.s.revision,
+ uv_min_hub_revision_id, is_uv(~0));
- pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n",
- node_id.s.revision, node_id.s.part_number, node_id.s.node_id,
- m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode);
- return pnode;
+ return 1;
}
static void __init uv_tsc_check_sync(void)
@@ -130,38 +198,41 @@ static void __init uv_tsc_check_sync(void)
int sync_state;
int mmr_shift;
char *state;
- bool valid;
- /* Accommodate different UV arch BIOSes */
+ /* Different returns from different UV BIOS versions */
mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
mmr_shift =
is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
+ /* Check if TSC is valid for all sockets */
switch (sync_state) {
case UVH_TSC_SYNC_VALID:
state = "in sync";
- valid = true;
+ mark_tsc_async_resets("UV BIOS");
break;
- case UVH_TSC_SYNC_INVALID:
- state = "unstable";
- valid = false;
+ /* If BIOS state unknown, don't do anything */
+ case UVH_TSC_SYNC_UNKNOWN:
+ state = "unknown";
break;
+
+ /* Otherwise, BIOS indicates problem with TSC */
default:
- state = "unknown: assuming valid";
- valid = true;
+ state = "unstable";
+ mark_tsc_unstable("UV BIOS");
break;
}
pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
-
- /* Mark flag that says TSC != 0 is valid for socket 0 */
- if (valid)
- mark_tsc_async_resets("UV BIOS");
- else
- mark_tsc_unstable("UV BIOS");
}
+/* Selector for (4|4A|5) structs */
+#define uvxy_field(sname, field, undef) ( \
+ is_uv(UV4A) ? sname.s4a.field : \
+ is_uv(UV4) ? sname.s4.field : \
+ is_uv(UV3) ? sname.s3.field : \
+ undef)
+
/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
#define SMT_LEVEL 0 /* Leaf 0xb SMT level */
@@ -221,29 +292,110 @@ static void __init uv_stringify(int len, char *to, char *from)
strncpy(to, from, len-1);
}
-static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
+/* Find UV arch type entry in UVsystab */
+static unsigned long __init early_find_archtype(struct uv_systab *st)
+{
+ int i;
+
+ for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
+ unsigned long ptr = st->entry[i].offset;
+
+ if (!ptr)
+ continue;
+ ptr += (unsigned long)st;
+ if (st->entry[i].type == UV_SYSTAB_TYPE_ARCH_TYPE)
+ return ptr;
+ }
+ return 0;
+}
+
+/* Validate UV arch type field in UVsystab */
+static int __init decode_arch_type(unsigned long ptr)
+{
+ struct uv_arch_type_entry *uv_ate = (struct uv_arch_type_entry *)ptr;
+ int n = strlen(uv_ate->archtype);
+
+ if (n > 0 && n < sizeof(uv_ate->archtype)) {
+ pr_info("UV: UVarchtype received from BIOS\n");
+ uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype);
+ return 1;
+ }
+ return 0;
+}
+
+/* Determine if UV arch type entry might exist in UVsystab */
+static int __init early_get_arch_type(void)
{
- int pnodeid;
- int uv_apic;
+ unsigned long uvst_physaddr, uvst_size, ptr;
+ struct uv_systab *st;
+ u32 rev;
+ int ret;
+
+ uvst_physaddr = get_uv_systab_phys(0);
+ if (!uvst_physaddr)
+ return 0;
+
+ st = early_memremap_ro(uvst_physaddr, sizeof(struct uv_systab));
+ if (!st) {
+ pr_err("UV: Cannot access UVsystab, remap failed\n");
+ return 0;
+ }
+ rev = st->revision;
+ if (rev < UV_SYSTAB_VERSION_UV5) {
+ early_memunmap(st, sizeof(struct uv_systab));
+ return 0;
+ }
+
+ uvst_size = st->size;
+ early_memunmap(st, sizeof(struct uv_systab));
+ st = early_memremap_ro(uvst_physaddr, uvst_size);
+ if (!st) {
+ pr_err("UV: Cannot access UVarchtype, remap failed\n");
+ return 0;
+ }
+
+ ptr = early_find_archtype(st);
+ if (!ptr) {
+ early_memunmap(st, uvst_size);
+ return 0;
+ }
+
+ ret = decode_arch_type(ptr);
+ early_memunmap(st, uvst_size);
+ return ret;
+}
+
+static int __init uv_set_system_type(char *_oem_id)
+{
+ /* Save OEM_ID passed from ACPI MADT */
uv_stringify(sizeof(oem_id), oem_id, _oem_id);
- uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
- if (strncmp(oem_id, "SGI", 3) != 0) {
- if (strncmp(oem_id, "NSGI", 4) != 0)
+ /* Check if BIOS sent us a UVarchtype */
+ if (!early_get_arch_type())
+
+ /* If not use OEM ID for UVarchtype */
+ uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id);
+
+ /* Check if not hubbed */
+ if (strncmp(uv_archtype, "SGI", 3) != 0) {
+
+ /* (Not hubbed), check if not hubless */
+ if (strncmp(uv_archtype, "NSGI", 4) != 0)
+
+ /* (Not hubless), not a UV */
return 0;
- /* UV4 Hubless, CH, (0x11:UV4+Any) */
- if (strncmp(oem_id, "NSGI4", 5) == 0)
+ /* UV4 Hubless: CH */
+ if (strncmp(uv_archtype, "NSGI4", 5) == 0)
uv_hubless_system = 0x11;
- /* UV3 Hubless, UV300/MC990X w/o hub (0x9:UV3+Any) */
+ /* UV3 Hubless: UV300/MC990X w/o hub */
else
uv_hubless_system = 0x9;
- pr_info("UV: OEM IDs %s/%s, HUBLESS(0x%x)\n",
- oem_id, oem_table_id, uv_hubless_system);
-
+ pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n",
+ oem_id, oem_table_id, uv_system_type, uv_hubless_system);
return 0;
}
@@ -252,60 +404,83 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
return 0;
}
- /* Set up early hub type field in uv_hub_info for Node 0 */
- uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
+ /* Set hubbed type if true */
+ uv_hub_info->hub_revision =
+ !strncmp(uv_archtype, "SGI5", 4) ? UV5_HUB_REVISION_BASE :
+ !strncmp(uv_archtype, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
+ !strncmp(uv_archtype, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+ !strcmp(uv_archtype, "SGI2") ? UV2_HUB_REVISION_BASE : 0;
+
+ switch (uv_hub_info->hub_revision) {
+ case UV5_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x21;
+ uv_hub_type_set(UV5);
+ break;
- /*
- * Determine UV arch type.
- * SGI2: UV2000/3000
- * SGI3: UV300 (truncated to 4 chars because of different varieties)
- * SGI4: UV400 (truncated to 4 chars because of different varieties)
- */
- if (!strncmp(oem_id, "SGI4", 4)) {
- uv_hub_info->hub_revision = UV4_HUB_REVISION_BASE;
+ case UV4_HUB_REVISION_BASE:
uv_hubbed_system = 0x11;
+ uv_hub_type_set(UV4);
+ break;
- } else if (!strncmp(oem_id, "SGI3", 4)) {
- uv_hub_info->hub_revision = UV3_HUB_REVISION_BASE;
+ case UV3_HUB_REVISION_BASE:
uv_hubbed_system = 0x9;
+ uv_hub_type_set(UV3);
+ break;
- } else if (!strcmp(oem_id, "SGI2")) {
- uv_hub_info->hub_revision = UV2_HUB_REVISION_BASE;
+ case UV2_HUB_REVISION_BASE:
uv_hubbed_system = 0x5;
+ uv_hub_type_set(UV2);
+ break;
- } else {
- uv_hub_info->hub_revision = 0;
- goto badbios;
+ default:
+ return 0;
}
- pnodeid = early_get_pnodeid();
- early_get_apic_socketid_shift();
+ /* Get UV hub chip part number & revision */
+ early_set_hub_type();
+ /* Other UV setup functions */
+ early_get_pnodeid();
+ early_get_apic_socketid_shift();
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
x86_platform.nmi_init = uv_nmi_init;
+ uv_tsc_check_sync();
+
+ return 1;
+}
+
+/* Called early to probe for the correct APIC driver */
+static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
+{
+ /* Set up early hub info fields for Node 0 */
+ uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
+
+ /* If not UV, return. */
+ if (likely(uv_set_system_type(_oem_id) == 0))
+ return 0;
+
+ /* Save and Decode OEM Table ID */
+ uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
- if (!strcmp(oem_table_id, "UVX")) {
- /* This is the most common hardware variant: */
+ /* This is the most common hardware variant, x2apic mode */
+ if (!strcmp(oem_table_id, "UVX"))
uv_system_type = UV_X2APIC;
- uv_apic = 0;
- } else if (!strcmp(oem_table_id, "UVL")) {
- /* Only used for very small systems: */
+ /* Only used for very small systems, usually 1 chassis, legacy mode */
+ else if (!strcmp(oem_table_id, "UVL"))
uv_system_type = UV_LEGACY_APIC;
- uv_apic = 0;
- } else {
+ else
goto badbios;
- }
- pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic);
- uv_tsc_check_sync();
+ pr_info("UV: OEM IDs %s/%s, System/UVType %d/0x%x, HUB RevID %d\n",
+ oem_id, oem_table_id, uv_system_type, is_uv(UV_ANY),
+ uv_min_hub_revision_id);
- return uv_apic;
+ return 0;
badbios:
- pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
- pr_err("Current UV Type or BIOS not supported\n");
+ pr_err("UV: UVarchtype:%s not supported\n", uv_archtype);
BUG();
}
@@ -673,12 +848,12 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
};
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH 3
-#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+#define DEST_SHIFT UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT
static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
{
- union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
- union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+ union uvh_rh_gam_alias_2_overlay_config_u alias;
+ union uvh_rh_gam_alias_2_redirect_config_u redirect;
unsigned long m_redirect;
unsigned long m_overlay;
int i;
@@ -686,16 +861,16 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) {
switch (i) {
case 0:
- m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR;
- m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR;
+ m_redirect = UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG;
+ m_overlay = UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG;
break;
case 1:
- m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR;
- m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR;
+ m_redirect = UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG;
+ m_overlay = UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG;
break;
case 2:
- m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR;
- m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR;
+ m_redirect = UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG;
+ m_overlay = UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG;
break;
}
alias.v = uv_read_local_mmr(m_overlay);
@@ -710,6 +885,7 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
}
enum map_type {map_wb, map_uc};
+static const char * const mt[] = { "WB", "UC" };
static __init void map_high(char *id, unsigned long base, int pshift, int bshift, int max_pnode, enum map_type map_type)
{
@@ -721,23 +897,36 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift
pr_info("UV: Map %s_HI base address NULL\n", id);
return;
}
- pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
init_extra_mapping_wb(paddr, bytes);
+
+ pr_info("UV: Map %s_HI 0x%lx - 0x%lx %s (%d segments)\n",
+ id, paddr, paddr + bytes, mt[map_type], max_pnode + 1);
}
static __init void map_gru_high(int max_pnode)
{
- union uvh_rh_gam_gru_overlay_config_mmr_u gru;
- int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
- unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK;
- unsigned long base;
+ union uvh_rh_gam_gru_overlay_config_u gru;
+ unsigned long mask, base;
+ int shift;
+
+ if (UVH_RH_GAM_GRU_OVERLAY_CONFIG) {
+ gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG);
+ shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+ mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+ } else if (UVH_RH10_GAM_GRU_OVERLAY_CONFIG) {
+ gru.v = uv_read_local_mmr(UVH_RH10_GAM_GRU_OVERLAY_CONFIG);
+ shift = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+ mask = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+ } else {
+ pr_err("UV: GRU unavailable (no MMR)\n");
+ return;
+ }
- gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
if (!gru.s.enable) {
- pr_info("UV: GRU disabled\n");
+ pr_info("UV: GRU disabled (by BIOS)\n");
return;
}
@@ -749,62 +938,104 @@ static __init void map_gru_high(int max_pnode)
static __init void map_mmr_high(int max_pnode)
{
- union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
- int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ unsigned long base;
+ int shift;
+ bool enable;
+
+ if (UVH_RH10_GAM_MMR_OVERLAY_CONFIG) {
+ union uvh_rh10_gam_mmr_overlay_config_u mmr;
+
+ mmr.v = uv_read_local_mmr(UVH_RH10_GAM_MMR_OVERLAY_CONFIG);
+ enable = mmr.s.enable;
+ base = mmr.s.base;
+ shift = UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+ } else if (UVH_RH_GAM_MMR_OVERLAY_CONFIG) {
+ union uvh_rh_gam_mmr_overlay_config_u mmr;
+
+ mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG);
+ enable = mmr.s.enable;
+ base = mmr.s.base;
+ shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+ } else {
+ pr_err("UV:%s:RH_GAM_MMR_OVERLAY_CONFIG MMR undefined?\n",
+ __func__);
+ return;
+ }
- mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
- if (mmr.s.enable)
- map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
+ if (enable)
+ map_high("MMR", base, shift, shift, max_pnode, map_uc);
else
pr_info("UV: MMR disabled\n");
}
-/* UV3/4 have identical MMIOH overlay configs, UV4A is slightly different */
-static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
-{
- unsigned long overlay;
- unsigned long mmr;
- unsigned long base;
- unsigned long nasid_mask;
- unsigned long m_overlay;
- int i, n, shift, m_io, max_io;
- int nasid, lnasid, fi, li;
- char *id;
-
- if (index == 0) {
- id = "MMIOH0";
- m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR;
- overlay = uv_read_local_mmr(m_overlay);
- base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK;
- mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR;
- m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
- >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
- shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
- n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
- nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK;
- } else {
- id = "MMIOH1";
- m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR;
- overlay = uv_read_local_mmr(m_overlay);
- base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK;
- mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR;
- m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
- >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
- shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
- n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH;
- nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK;
+/* Arch specific ENUM cases */
+enum mmioh_arch {
+ UV2_MMIOH = -1,
+ UVY_MMIOH0, UVY_MMIOH1,
+ UVX_MMIOH0, UVX_MMIOH1,
+};
+
+/* Calculate and Map MMIOH Regions */
+static void __init calc_mmioh_map(enum mmioh_arch index,
+ int min_pnode, int max_pnode,
+ int shift, unsigned long base, int m_io, int n_io)
+{
+ unsigned long mmr, nasid_mask;
+ int nasid, min_nasid, max_nasid, lnasid, mapped;
+ int i, fi, li, n, max_io;
+ char id[8];
+
+ /* One (UV2) mapping */
+ if (index == UV2_MMIOH) {
+ strncpy(id, "MMIOH", sizeof(id));
+ max_io = max_pnode;
+ mapped = 0;
+ goto map_exit;
}
- pr_info("UV: %s overlay 0x%lx base:0x%lx m_io:%d\n", id, overlay, base, m_io);
- if (!(overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)) {
- pr_info("UV: %s disabled\n", id);
+
+ /* small and large MMIOH mappings */
+ switch (index) {
+ case UVY_MMIOH0:
+ mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0;
+ nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+ n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
+ min_nasid = min_pnode;
+ max_nasid = max_pnode;
+ mapped = 1;
+ break;
+ case UVY_MMIOH1:
+ mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1;
+ nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+ n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
+ min_nasid = min_pnode;
+ max_nasid = max_pnode;
+ mapped = 1;
+ break;
+ case UVX_MMIOH0:
+ mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0;
+ nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+ n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
+ min_nasid = min_pnode * 2;
+ max_nasid = max_pnode * 2;
+ mapped = 1;
+ break;
+ case UVX_MMIOH1:
+ mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1;
+ nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+ n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
+ min_nasid = min_pnode * 2;
+ max_nasid = max_pnode * 2;
+ mapped = 1;
+ break;
+ default:
+ pr_err("UV:%s:Invalid mapping type:%d\n", __func__, index);
return;
}
- /* Convert to NASID: */
- min_pnode *= 2;
- max_pnode *= 2;
- max_io = lnasid = fi = li = -1;
+ /* enum values chosen so (index mod 2) is MMIOH 0/1 (low/high) */
+ snprintf(id, sizeof(id), "MMIOH%d", index%2);
+ max_io = lnasid = fi = li = -1;
for (i = 0; i < n; i++) {
unsigned long m_redirect = mmr + i * 8;
unsigned long redirect = uv_read_local_mmr(m_redirect);
@@ -814,9 +1045,12 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
pr_info("UV: %s redirect base 0x%lx(@0x%lx) 0x%04x\n",
id, redirect, m_redirect, nasid);
- /* Invalid NASID: */
- if (nasid < min_pnode || max_pnode < nasid)
+ /* Invalid NASID check */
+ if (nasid < min_nasid || max_nasid < nasid) {
+ pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n",
+ __func__, index, min_nasid, max_nasid);
nasid = -1;
+ }
if (nasid == lnasid) {
li = i;
@@ -839,7 +1073,8 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
}
addr1 = (base << shift) + f * (1ULL << m_io);
addr2 = (base << shift) + (l + 1) * (1ULL << m_io);
- pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", id, fi, li, lnasid, addr1, addr2);
+ pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
+ id, fi, li, lnasid, addr1, addr2);
if (max_io < l)
max_io = l;
}
@@ -847,49 +1082,93 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
lnasid = nasid;
}
- pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", id, base, shift, m_io, max_io);
+map_exit:
+ pr_info("UV: %s base:0x%lx shift:%d m_io:%d max_io:%d max_pnode:0x%x\n",
+ id, base, shift, m_io, max_io, max_pnode);
- if (max_io >= 0)
+ if (max_io >= 0 && !mapped)
map_high(id, base, shift, m_io, max_io, map_uc);
}
static __init void map_mmioh_high(int min_pnode, int max_pnode)
{
- union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
- unsigned long mmr, base;
- int shift, enable, m_io, n_io;
+ /* UVY flavor */
+ if (UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0) {
+ union uvh_rh10_gam_mmioh_overlay_config0_u mmioh0;
+ union uvh_rh10_gam_mmioh_overlay_config1_u mmioh1;
+
+ mmioh0.v = uv_read_local_mmr(UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0);
+ if (unlikely(mmioh0.s.enable == 0))
+ pr_info("UV: MMIOH0 disabled\n");
+ else
+ calc_mmioh_map(UVY_MMIOH0, min_pnode, max_pnode,
+ UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT,
+ mmioh0.s.base, mmioh0.s.m_io, mmioh0.s.n_io);
- if (is_uv3_hub() || is_uv4_hub()) {
- /* Map both MMIOH regions: */
- map_mmioh_high_uv34(0, min_pnode, max_pnode);
- map_mmioh_high_uv34(1, min_pnode, max_pnode);
+ mmioh1.v = uv_read_local_mmr(UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1);
+ if (unlikely(mmioh1.s.enable == 0))
+ pr_info("UV: MMIOH1 disabled\n");
+ else
+ calc_mmioh_map(UVY_MMIOH1, min_pnode, max_pnode,
+ UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT,
+ mmioh1.s.base, mmioh1.s.m_io, mmioh1.s.n_io);
return;
}
+ /* UVX flavor */
+ if (UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0) {
+ union uvh_rh_gam_mmioh_overlay_config0_u mmioh0;
+ union uvh_rh_gam_mmioh_overlay_config1_u mmioh1;
+
+ mmioh0.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0);
+ if (unlikely(mmioh0.s.enable == 0))
+ pr_info("UV: MMIOH0 disabled\n");
+ else {
+ unsigned long base = uvxy_field(mmioh0, base, 0);
+ int m_io = uvxy_field(mmioh0, m_io, 0);
+ int n_io = uvxy_field(mmioh0, n_io, 0);
+
+ calc_mmioh_map(UVX_MMIOH0, min_pnode, max_pnode,
+ UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT,
+ base, m_io, n_io);
+ }
- if (is_uv2_hub()) {
- mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
- shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
- mmioh.v = uv_read_local_mmr(mmr);
- enable = !!mmioh.s2.enable;
- base = mmioh.s2.base;
- m_io = mmioh.s2.m_io;
- n_io = mmioh.s2.n_io;
-
- if (enable) {
- max_pnode &= (1 << n_io) - 1;
- pr_info("UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
- base, shift, m_io, n_io, max_pnode);
- map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
- } else {
- pr_info("UV: MMIOH disabled\n");
+ mmioh1.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1);
+ if (unlikely(mmioh1.s.enable == 0))
+ pr_info("UV: MMIOH1 disabled\n");
+ else {
+ unsigned long base = uvxy_field(mmioh1, base, 0);
+ int m_io = uvxy_field(mmioh1, m_io, 0);
+ int n_io = uvxy_field(mmioh1, n_io, 0);
+
+ calc_mmioh_map(UVX_MMIOH1, min_pnode, max_pnode,
+ UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT,
+ base, m_io, n_io);
}
+ return;
+ }
+
+ /* UV2 flavor */
+ if (UVH_RH_GAM_MMIOH_OVERLAY_CONFIG) {
+ union uvh_rh_gam_mmioh_overlay_config_u mmioh;
+
+ mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG);
+ if (unlikely(mmioh.s2.enable == 0))
+ pr_info("UV: MMIOH disabled\n");
+ else
+ calc_mmioh_map(UV2_MMIOH, min_pnode, max_pnode,
+ UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT,
+ mmioh.s2.base, mmioh.s2.m_io, mmioh.s2.n_io);
+ return;
}
}
static __init void map_low_mmrs(void)
{
- init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
- init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+ if (UV_GLOBAL_MMR32_BASE)
+ init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+
+ if (UV_LOCAL_MMR_BASE)
+ init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
}
static __init void uv_rtc_init(void)
@@ -909,85 +1188,6 @@ static __init void uv_rtc_init(void)
}
}
-/*
- * percpu heartbeat timer
- */
-static void uv_heartbeat(struct timer_list *timer)
-{
- unsigned char bits = uv_scir_info->state;
-
- /* Flip heartbeat bit: */
- bits ^= SCIR_CPU_HEARTBEAT;
-
- /* Is this CPU idle? */
- if (idle_cpu(raw_smp_processor_id()))
- bits &= ~SCIR_CPU_ACTIVITY;
- else
- bits |= SCIR_CPU_ACTIVITY;
-
- /* Update system controller interface reg: */
- uv_set_scir_bits(bits);
-
- /* Enable next timer period: */
- mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
-}
-
-static int uv_heartbeat_enable(unsigned int cpu)
-{
- while (!uv_cpu_scir_info(cpu)->enabled) {
- struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
-
- uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
- timer_setup(timer, uv_heartbeat, TIMER_PINNED);
- timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
- add_timer_on(timer, cpu);
- uv_cpu_scir_info(cpu)->enabled = 1;
-
- /* Also ensure that boot CPU is enabled: */
- cpu = 0;
- }
- return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int uv_heartbeat_disable(unsigned int cpu)
-{
- if (uv_cpu_scir_info(cpu)->enabled) {
- uv_cpu_scir_info(cpu)->enabled = 0;
- del_timer(&uv_cpu_scir_info(cpu)->timer);
- }
- uv_set_cpu_scir_bits(cpu, 0xff);
- return 0;
-}
-
-static __init void uv_scir_register_cpu_notifier(void)
-{
- cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/x2apic-uvx:online",
- uv_heartbeat_enable, uv_heartbeat_disable);
-}
-
-#else /* !CONFIG_HOTPLUG_CPU */
-
-static __init void uv_scir_register_cpu_notifier(void)
-{
-}
-
-static __init int uv_init_heartbeat(void)
-{
- int cpu;
-
- if (is_uv_system()) {
- for_each_online_cpu(cpu)
- uv_heartbeat_enable(cpu);
- }
-
- return 0;
-}
-
-late_initcall(uv_init_heartbeat);
-
-#endif /* !CONFIG_HOTPLUG_CPU */
-
/* Direct Legacy VGA I/O traffic to designated IOH */
static int uv_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags)
{
@@ -1027,26 +1227,22 @@ struct mn {
unsigned char n_lshift;
};
+/* Initialize caller's MN struct and fill in values */
static void get_mn(struct mn *mnp)
{
- union uvh_rh_gam_config_mmr_u m_n_config;
- union uv3h_gr0_gam_gr_config_u m_gr_config;
-
- /* Make sure the whole structure is well initialized: */
memset(mnp, 0, sizeof(*mnp));
-
- m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR);
- mnp->n_val = m_n_config.s.n_skt;
-
- if (is_uv4_hub()) {
+ mnp->n_val = uv_cpuid.n_skt;
+ if (is_uv(UV4|UVY)) {
mnp->m_val = 0;
mnp->n_lshift = 0;
} else if (is_uv3_hub()) {
- mnp->m_val = m_n_config.s3.m_skt;
- m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
+ union uvyh_gr0_gam_gr_config_u m_gr_config;
+
+ mnp->m_val = uv_cpuid.m_skt;
+ m_gr_config.v = uv_read_local_mmr(UVH_GR0_GAM_GR_CONFIG);
mnp->n_lshift = m_gr_config.s3.m_skt;
} else if (is_uv2_hub()) {
- mnp->m_val = m_n_config.s2.m_skt;
+ mnp->m_val = uv_cpuid.m_skt;
mnp->n_lshift = mnp->m_val == 40 ? 40 : 39;
}
mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0;
@@ -1054,7 +1250,6 @@ static void get_mn(struct mn *mnp)
static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
{
- union uvh_node_id_u node_id;
struct mn mn;
get_mn(&mn);
@@ -1067,7 +1262,9 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
hi->m_shift = mn.m_shift;
hi->n_lshift = mn.n_lshift ? mn.n_lshift : 0;
hi->hub_revision = uv_hub_info->hub_revision;
+ hi->hub_type = uv_hub_info->hub_type;
hi->pnode_mask = uv_cpuid.pnode_mask;
+ hi->nasid_shift = uv_cpuid.nasid_shift;
hi->min_pnode = _min_pnode;
hi->min_socket = _min_socket;
hi->pnode_to_socket = _pnode_to_socket;
@@ -1076,9 +1273,8 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
hi->gr_table_len = _gr_table_len;
hi->gr_table = _gr_table;
- node_id.v = uv_read_local_mmr(UVH_NODE_ID);
uv_cpuid.gnode_shift = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val);
- hi->gnode_extra = (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
+ hi->gnode_extra = (uv_node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
if (mn.m_val)
hi->gnode_upper = (u64)hi->gnode_extra << mn.m_val;
@@ -1090,7 +1286,9 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
hi->gpa_shift = uv_gp_table->gpa_shift;
hi->gpa_mask = (1UL << hi->gpa_shift) - 1;
} else {
- hi->global_mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE;
+ hi->global_mmr_base =
+ uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG) &
+ ~UV_MMR_ENABLE;
hi->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT;
}
@@ -1101,7 +1299,11 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
/* Show system specific info: */
pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", hi->n_val, hi->m_val, hi->m_shift, hi->n_lshift);
pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", hi->gpa_mask, hi->gpa_shift, hi->pnode_mask, hi->apic_pnode_shift);
- pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", hi->global_mmr_base, hi->global_mmr_shift, hi->global_gru_base, hi->global_gru_shift);
+ pr_info("UV: mmr_base/shift:0x%lx/%ld\n", hi->global_mmr_base, hi->global_mmr_shift);
+ if (hi->global_gru_base)
+ pr_info("UV: gru_base/shift:0x%lx/%ld\n",
+ hi->global_gru_base, hi->global_gru_shift);
+
pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", hi->gnode_upper, hi->gnode_extra);
}
@@ -1173,21 +1375,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode);
}
+/* Walk through UVsystab decoding the fields */
static int __init decode_uv_systab(void)
{
struct uv_systab *st;
int i;
- /* If system is uv3 or lower, there is no extended UVsystab */
- if (is_uv_hubbed(0xfffffe) < uv(4) && is_uv_hubless(0xfffffe) < uv(4))
- return 0; /* No extended UVsystab required */
-
+ /* Get mapped UVsystab pointer */
st = uv_systab;
+
+ /* If UVsystab is version 1, there is no extended UVsystab */
+ if (st && st->revision == UV_SYSTAB_VERSION_1)
+ return 0;
+
if ((!st) || (st->revision < UV_SYSTAB_VERSION_UV4_LATEST)) {
int rev = st ? st->revision : 0;
- pr_err("UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", rev, UV_SYSTAB_VERSION_UV4_LATEST);
- pr_err("UV: Cannot support UV operations, switching to generic PC\n");
+ pr_err("UV: BIOS UVsystab mismatch, (%x < %x)\n",
+ rev, UV_SYSTAB_VERSION_UV4_LATEST);
+ pr_err("UV: Does not support UV, switch to non-UV x86_64\n");
uv_system_type = UV_NONE;
return -EINVAL;
@@ -1199,7 +1405,8 @@ static int __init decode_uv_systab(void)
if (!ptr)
continue;
- ptr = ptr + (unsigned long)st;
+ /* point to payload */
+ ptr += (unsigned long)st;
switch (st->entry[i].type) {
case UV_SYSTAB_TYPE_GAM_PARAMS:
@@ -1209,32 +1416,49 @@ static int __init decode_uv_systab(void)
case UV_SYSTAB_TYPE_GAM_RNG_TBL:
decode_gam_rng_tbl(ptr);
break;
+
+ case UV_SYSTAB_TYPE_ARCH_TYPE:
+ /* already processed in early startup */
+ break;
+
+ default:
+ pr_err("UV:%s:Unrecognized UV_SYSTAB_TYPE:%d, skipped\n",
+ __func__, st->entry[i].type);
+ break;
}
}
return 0;
}
-/*
- * Set up physical blade translations from UVH_NODE_PRESENT_TABLE
- * .. NB: UVH_NODE_PRESENT_TABLE is going away,
- * .. being replaced by GAM Range Table
- */
+/* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */
static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
{
+ unsigned long np;
int i, uv_pb = 0;
- pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH);
- for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
- unsigned long np;
-
- np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
- if (np)
+ if (UVH_NODE_PRESENT_TABLE) {
+ pr_info("UV: NODE_PRESENT_DEPTH = %d\n",
+ UVH_NODE_PRESENT_TABLE_DEPTH);
+ for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+ np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
-
+ uv_pb += hweight64(np);
+ }
+ }
+ if (UVH_NODE_PRESENT_0) {
+ np = uv_read_local_mmr(UVH_NODE_PRESENT_0);
+ pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np);
+ uv_pb += hweight64(np);
+ }
+ if (UVH_NODE_PRESENT_1) {
+ np = uv_read_local_mmr(UVH_NODE_PRESENT_1);
+ pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np);
uv_pb += hweight64(np);
}
if (uv_possible_blades != uv_pb)
uv_possible_blades = uv_pb;
+
+ pr_info("UV: number nodes/possible blades %d\n", uv_pb);
}
static void __init build_socket_tables(void)
@@ -1253,7 +1477,7 @@ static void __init build_socket_tables(void)
pr_info("UV: No UVsystab socket table, ignoring\n");
return;
}
- pr_crit("UV: Error: UVsystab address translations not available!\n");
+ pr_err("UV: Error: UVsystab address translations not available!\n");
BUG();
}
@@ -1379,9 +1603,9 @@ static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data)
return 0;
}
-static int __maybe_unused proc_oemid_show(struct seq_file *file, void *data)
+static int __maybe_unused proc_archtype_show(struct seq_file *file, void *data)
{
- seq_printf(file, "%s/%s\n", oem_id, oem_table_id);
+ seq_printf(file, "%s/%s\n", uv_archtype, oem_table_id);
return 0;
}
@@ -1390,7 +1614,7 @@ static __init void uv_setup_proc_files(int hubless)
struct proc_dir_entry *pde;
pde = proc_mkdir(UV_PROC_NODE, NULL);
- proc_create_single("oemid", 0, pde, proc_oemid_show);
+ proc_create_single("archtype", 0, pde, proc_archtype_show);
if (hubless)
proc_create_single("hubless", 0, pde, proc_hubless_show);
else
@@ -1429,7 +1653,8 @@ static void __init uv_system_init_hub(void)
struct uv_hub_info_s hub_info = {0};
int bytes, cpu, nodeid;
unsigned short min_pnode = 9999, max_pnode = 0;
- char *hub = is_uv4_hub() ? "UV400" :
+ char *hub = is_uv5_hub() ? "UV500" :
+ is_uv4_hub() ? "UV400" :
is_uv3_hub() ? "UV300" :
is_uv2_hub() ? "UV2000/3000" : NULL;
@@ -1441,12 +1666,14 @@ static void __init uv_system_init_hub(void)
map_low_mmrs();
- /* Get uv_systab for decoding: */
+ /* Get uv_systab for decoding, setup UV BIOS calls */
uv_bios_init();
/* If there's an UVsystab problem then abort UV init: */
- if (decode_uv_systab() < 0)
+ if (decode_uv_systab() < 0) {
+ pr_err("UV: Mangled UVsystab format\n");
return;
+ }
build_socket_tables();
build_uv_gr_table();
@@ -1517,8 +1744,6 @@ static void __init uv_system_init_hub(void)
uv_hub_info_list(numa_node_id)->pnode = pnode;
else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
uv_cpu_hub_info(cpu)->pnode = pnode;
-
- uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid);
}
for_each_node(nodeid) {
@@ -1547,7 +1772,6 @@ static void __init uv_system_init_hub(void)
uv_nmi_setup();
uv_cpu_init();
- uv_scir_register_cpu_notifier();
uv_setup_proc_files(0);
/* Register Legacy VGA I/O redirection handler: */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 3ca07ad552ae..70b7154f4bdd 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -38,9 +38,6 @@ static void __used common(void)
#endif
BLANK();
- OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
-
- BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
BLANK();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index dcc3d943c68f..6062ce586b95 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -614,7 +614,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
* If BIOS has not enabled SME then don't advertise the
* SME feature (set in scattered.c).
* For SEV: If BIOS has not enabled SEV then don't advertise the
- * SEV feature (set in scattered.c).
+ * SEV and SEV_ES feature (set in scattered.c).
*
* In all cases, since support for SME and SEV requires long mode,
* don't advertise the feature under CONFIG_X86_32.
@@ -645,6 +645,7 @@ clear_all:
setup_clear_cpu_cap(X86_FEATURE_SME);
clear_sev:
setup_clear_cpu_cap(X86_FEATURE_SEV);
+ setup_clear_cpu_cap(X86_FEATURE_SEV_ES);
}
}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index c5cf336e5077..345f7d905db6 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -65,6 +65,9 @@ static void init_c3(struct cpuinfo_x86 *c)
c->x86_cache_alignment = c->x86_clflush_size * 2;
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
+
+ if (c->x86 >= 7)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
enum {
@@ -90,18 +93,15 @@ enum {
static void early_init_centaur(struct cpuinfo_x86 *c)
{
- switch (c->x86) {
#ifdef CONFIG_X86_32
- case 5:
- /* Emulate MTRRs using Centaur's MCR. */
+ /* Emulate MTRRs using Centaur's MCR. */
+ if (c->x86 == 5)
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
- break;
#endif
- case 6:
- if (c->x86_model >= 0xf)
- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
- break;
- }
+ if ((c->x86 == 6 && c->x86_model >= 0xf) ||
+ (c->x86 >= 7))
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#endif
@@ -145,9 +145,8 @@ static void init_centaur(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
- switch (c->x86) {
#ifdef CONFIG_X86_32
- case 5:
+ if (c->x86 == 5) {
switch (c->x86_model) {
case 4:
name = "C6";
@@ -207,12 +206,10 @@ static void init_centaur(struct cpuinfo_x86 *c)
c->x86_cache_size = (cc>>24)+(dd>>24);
}
sprintf(c->x86_model_id, "WinChip %s", name);
- break;
+ }
#endif
- case 6:
+ if (c->x86 == 6 || c->x86 >= 7)
init_c3(c);
- break;
- }
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c5d6f17d9b9d..35ad8480c464 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -23,6 +23,7 @@
#include <linux/syscore_ops.h>
#include <linux/pgtable.h>
+#include <asm/cmdline.h>
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
@@ -359,7 +360,7 @@ void native_write_cr0(unsigned long val)
unsigned long bits_missing = 0;
set_register:
- asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+ asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
@@ -378,7 +379,7 @@ void native_write_cr4(unsigned long val)
unsigned long bits_changed = 0;
set_register:
- asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+ asm volatile("mov %0,%%cr4": "+r" (val) : : "memory");
if (static_branch_likely(&cr_pinning)) {
if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
@@ -1221,6 +1222,59 @@ static void detect_nopl(void)
}
/*
+ * We parse cpu parameters early because fpu__init_system() is executed
+ * before parse_early_param().
+ */
+static void __init cpu_parse_early_param(void)
+{
+ char arg[128];
+ char *argptr = arg;
+ int arglen, res, bit;
+
+#ifdef CONFIG_X86_32
+ if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
+ setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+ pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
+#endif
+
+ if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
+#endif
+
+ if (cmdline_find_option_bool(boot_command_line, "noxsave"))
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+
+ if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+
+ if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+ arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
+ if (arglen <= 0)
+ return;
+
+ pr_info("Clearing CPUID bits:");
+ do {
+ res = get_option(&argptr, &bit);
+ if (res == 0 || res == 3)
+ break;
+
+ /* If the argument was too long, the last bit may be cut off */
+ if (res == 1 && arglen >= sizeof(arg))
+ break;
+
+ if (bit >= 0 && bit < NCAPINTS * 32) {
+ pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+ setup_clear_cpu_cap(bit);
+ }
+ } while (res == 2);
+ pr_cont("\n");
+}
+
+/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
* cache alignment.
@@ -1255,6 +1309,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
get_cpu_cap(c);
get_cpu_address_sizes(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+ cpu_parse_early_param();
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
@@ -1413,15 +1468,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
* ESPFIX issue, we can change this.
*/
#ifdef CONFIG_X86_32
-# ifdef CONFIG_PARAVIRT_XXL
- do {
- extern void native_iret(void);
- if (pv_ops.cpu.iret == native_iret)
- set_cpu_bug(c, X86_BUG_ESPFIX);
- } while (0);
-# else
set_cpu_bug(c, X86_BUG_ESPFIX);
-# endif
#endif
}
@@ -1829,6 +1876,8 @@ static inline void tss_setup_ist(struct tss_struct *tss)
tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+ /* Only mapped when SEV-ES is active */
+ tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
}
#else /* CONFIG_X86_64 */
@@ -1861,6 +1910,29 @@ static inline void tss_setup_io_bitmap(struct tss_struct *tss)
}
/*
+ * Setup everything needed to handle exceptions from the IDT, including the IST
+ * exceptions which use paranoid_entry().
+ */
+void cpu_init_exception_handling(void)
+{
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+ int cpu = raw_smp_processor_id();
+
+ /* paranoid_entry() gets the CPU number from the GDT */
+ setup_getcpu(cpu);
+
+ /* IST vectors need TSS to be set up. */
+ tss_setup_ist(tss);
+ tss_setup_io_bitmap(tss);
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+
+ load_TR_desc();
+
+ /* Finally load the IDT */
+ load_current_idt();
+}
+
+/*
* cpu_init() initializes state that is per-CPU. Some data is already
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 9d033693519a..67944128876d 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -38,7 +38,7 @@ struct _tlb_table {
#define cpu_dev_register(cpu_devX) \
static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
- __attribute__((__section__(".x86_cpu_dev.init"))) = \
+ __section(".x86_cpu_dev.init") = \
&cpu_devX;
extern const struct cpu_dev *const __x86_cpu_dev_start[],
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 3cbe24ca80ab..d502241995a3 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -69,6 +69,8 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
+ { X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
{}
};
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 99be063fcb1b..0c6b02dd744c 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -132,49 +132,49 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
}
static struct smca_hwid smca_hwid_mcatypes[] = {
- /* { bank_type, hwid_mcatype, xec_bitmap } */
+ /* { bank_type, hwid_mcatype } */
/* Reserved type */
- { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
/* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
- { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
- { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
- { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
- { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
+ { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
+ { SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
+ { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
+ { SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
- { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
- { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
- { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
+ { SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
+ { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
/* Data Fabric MCA types */
- { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
- { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
+ { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
/* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
/* Parameter Block MCA type */
- { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
+ { SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
/* Platform Security Processor MCA type */
- { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
- { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
+ { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
/* System Management Unit MCA type */
- { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
- { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
+ { SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
/* Microprocessor 5 Unit MCA type */
- { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
+ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
/* Northbridge IO Unit MCA type */
- { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
+ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
/* PCI Express Unit MCA type */
- { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
+ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f43a78bde670..4102b866e7c0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -40,7 +40,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
-#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/sync_core.h>
#include <linux/task_work.h>
@@ -373,42 +372,105 @@ static int msr_to_offset(u32 msr)
return -1;
}
+__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
+
+ show_stack_regs(regs);
+
+ panic("MCA architectural violation!\n");
+
+ while (true)
+ cpu_relax();
+
+ return true;
+}
+
/* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
+static noinstr u64 mce_rdmsrl(u32 msr)
{
- u64 v;
+ DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
- int offset = msr_to_offset(msr);
+ int offset;
+ u64 ret;
+
+ instrumentation_begin();
+ offset = msr_to_offset(msr);
if (offset < 0)
- return 0;
- return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
- }
+ ret = 0;
+ else
+ ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
- if (rdmsrl_safe(msr, &v)) {
- WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
- /*
- * Return zero in case the access faulted. This should
- * not happen normally but can happen if the CPU does
- * something weird, or if the code is buggy.
- */
- v = 0;
+ instrumentation_end();
+
+ return ret;
}
- return v;
+ /*
+ * RDMSR on MCA MSRs should not fault. If they do, this is very much an
+ * architectural violation and needs to be reported to hw vendor. Panic
+ * the box to not allow any further progress.
+ */
+ asm volatile("1: rdmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
+ : EAX_EDX_RET(val, low, high) : "c" (msr));
+
+
+ return EAX_EDX_VAL(val, low, high);
+}
+
+__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
+ regs->ip, (void *)regs->ip);
+
+ show_stack_regs(regs);
+
+ panic("MCA architectural violation!\n");
+
+ while (true)
+ cpu_relax();
+
+ return true;
}
-static void mce_wrmsrl(u32 msr, u64 v)
+static noinstr void mce_wrmsrl(u32 msr, u64 v)
{
+ u32 low, high;
+
if (__this_cpu_read(injectm.finished)) {
- int offset = msr_to_offset(msr);
+ int offset;
+
+ instrumentation_begin();
+ offset = msr_to_offset(msr);
if (offset >= 0)
*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
+
+ instrumentation_end();
+
return;
}
- wrmsrl(msr, v);
+
+ low = (u32)v;
+ high = (u32)(v >> 32);
+
+ /* See comment in mce_rdmsrl() */
+ asm volatile("1: wrmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
+ : : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
@@ -745,7 +807,7 @@ log_it:
goto clear_it;
mce_read_aux(&m, i);
- m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
+ m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
@@ -794,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
- if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -872,7 +934,6 @@ static void mce_reign(void)
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
- char *nmsg = NULL;
/*
* This CPU is the Monarch and the other CPUs have run
@@ -880,12 +941,10 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
- int severity = mce_severity(&per_cpu(mces_seen, cpu),
- mca_cfg.tolerant,
- &nmsg, true);
- if (severity > global_worst) {
- msg = nmsg;
- global_worst = severity;
+ struct mce *mtmp = &per_cpu(mces_seen, cpu);
+
+ if (mtmp->severity > global_worst) {
+ global_worst = mtmp->severity;
m = &per_cpu(mces_seen, cpu);
}
}
@@ -895,8 +954,11 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
- if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+ if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
+ /* call mce_severity() to get "msg" for panic */
+ mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
+ }
/*
* For UC somewhere we let the CPU who detects it handle it.
@@ -1105,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
-static void __mc_scan_banks(struct mce *m, struct mce *final,
+static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
@@ -1140,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- severity = mce_severity(m, cfg->tolerant, NULL, true);
+ severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
@@ -1188,13 +1250,34 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
+ !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+ sync_core();
return;
}
- pr_err("Memory error not recovered");
- kill_me_now(cb);
+ if (p->mce_vaddr != (void __user *)-1l) {
+ force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
+ } else {
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
+ }
+}
+
+static void queue_task_work(struct mce *m, int kill_it)
+{
+ current->mce_addr = m->addr;
+ current->mce_kflags = m->kflags;
+ current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(m);
+
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ else
+ current->mce_kill_me.func = kill_me_maybe;
+
+ task_work_add(current, &current->mce_kill_me, TWA_RESUME);
}
/*
@@ -1291,7 +1374,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
- __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
+ __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1313,7 +1396,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
- mce_severity(&m, cfg->tolerant, &msg, true);
+ mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
@@ -1330,25 +1413,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (worst > 0)
irq_work_queue(&mce_irq_work);
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-
- sync_core();
-
if (worst != MCE_AR_SEVERITY && !kill_it)
- return;
+ goto out;
/* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- current->mce_addr = m.addr;
- current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
- current->mce_whole_page = whole_page(&m);
- current->mce_kill_me.func = kill_me_maybe;
- if (kill_it)
- current->mce_kill_me.func = kill_me_now;
- task_work_add(current, &current->mce_kill_me, true);
+ queue_task_work(&m, kill_it);
+
} else {
/*
* Handle an MCE which has happened in kernel space but from
@@ -1363,7 +1437,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
+
+ if (m.kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&m, kill_it);
}
+out:
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
@@ -1904,6 +1983,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{
+ bool irq_state;
+
WARN_ON_ONCE(user_mode(regs));
/*
@@ -1914,7 +1995,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
mce_check_crashing_cpu())
return;
- nmi_enter();
+ irq_state = idtentry_enter_nmi(regs);
/*
* The call targets are marked noinstr, but objtool can't figure
* that out because it's an indirect call. Annotate it.
@@ -1925,7 +2006,7 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
if (regs->flags & X86_EFLAGS_IF)
trace_hardirqs_on_prepare();
instrumentation_end();
- nmi_exit();
+ idtentry_exit_nmi(regs, irq_state);
}
static __always_inline void exc_machine_check_user(struct pt_regs *regs)
@@ -2062,7 +2143,7 @@ void mce_disable_bank(int bank)
and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
- * mce=recovery force enable memcpy_mcsafe()
+ * mce=recovery force enable copy_mc_fragile()
*/
static int __init mcheck_enable(char *str)
{
@@ -2670,13 +2751,10 @@ static void __init mcheck_debugfs_init(void)
static void __init mcheck_debugfs_init(void) { }
#endif
-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
-EXPORT_SYMBOL_GPL(mcsafe_key);
-
static int __init mcheck_late_init(void)
{
if (mca_cfg.recovery)
- static_branch_inc(&mcsafe_key);
+ enable_copy_mc_fragile();
mcheck_debugfs_init();
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 03e51053592a..100fbeebdc72 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -67,7 +67,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
unlock:
mutex_unlock(&mce_chrdev_read_mutex);
- mce->kflags |= MCE_HANDLED_MCELOG;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ mce->kflags |= MCE_HANDLED_MCELOG;
+
return NOTIFY_OK;
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 6473070b5da4..88dcc79cfb07 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -38,7 +38,8 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
-extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
@@ -185,4 +186,14 @@ extern bool amd_filter_mce(struct mce *m);
static inline bool amd_filter_mce(struct mce *m) { return false; };
#endif
+__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr);
+
+__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr);
+
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index e1da619add19..83df991314c5 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -9,9 +9,14 @@
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <asm/mce.h>
#include <linux/uaccess.h>
+#include <asm/mce.h>
+#include <asm/intel-family.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+
#include "internal.h"
/*
@@ -40,9 +45,14 @@ static struct severity {
unsigned char context;
unsigned char excp;
unsigned char covered;
+ unsigned char cpu_model;
+ unsigned char cpu_minstepping;
+ unsigned char bank_lo, bank_hi;
char *msg;
} severities[] = {
#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
+#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
+#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
#define KERNEL_RECOV .context = IN_KERNEL_RECOV
@@ -90,14 +100,9 @@ static struct severity {
EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
- DEFERRED, "Deferred error",
- NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
- ),
- MCESEV(
KEEP, "Corrected error",
NOSER, BITCLR(MCI_STATUS_UC)
),
-
/*
* known AO MCACODs reported via MCE or CMC:
*
@@ -113,6 +118,18 @@ static struct severity {
AO, "Action optional: last level cache writeback error",
SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
),
+ /*
+ * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
+ * to report uncorrected errors using CMCI with a special signature.
+ * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
+ * in one of the memory controller banks.
+ * Set severity to "AO" for same action as normal patrol scrub error.
+ */
+ MCESEV(
+ AO, "Uncorrected Patrol Scrub Error",
+ SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
+ MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18)
+ ),
/* ignore OVER for UCNA */
MCESEV(
@@ -198,6 +215,47 @@ static struct severity {
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+static bool is_copy_from_user(struct pt_regs *regs)
+{
+ u8 insn_buf[MAX_INSN_SIZE];
+ struct insn insn;
+ unsigned long addr;
+
+ if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+ return false;
+
+ kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+ insn_get_opcode(&insn);
+ if (!insn.opcode.got)
+ return false;
+
+ switch (insn.opcode.value) {
+ /* MOV mem,reg */
+ case 0x8A: case 0x8B:
+ /* MOVZ mem,reg */
+ case 0xB60F: case 0xB70F:
+ insn_get_modrm(&insn);
+ insn_get_sib(&insn);
+ if (!insn.modrm.got || !insn.sib.got)
+ return false;
+ addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+ break;
+ /* REP MOVS */
+ case 0xA4: case 0xA5:
+ addr = regs->si;
+ break;
+ default:
+ return false;
+ }
+
+ if (fault_in_kernel_space(addr))
+ return false;
+
+ current->mce_vaddr = (void __user *)addr;
+
+ return true;
+}
+
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -209,15 +267,25 @@ static struct severity {
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
-static int error_context(struct mce *m)
+static int error_context(struct mce *m, struct pt_regs *regs)
{
+ enum handler_type t;
+
if ((m->cs & 3) == 3)
return IN_USER;
+ if (!mc_recoverable(m->mcgstatus))
+ return IN_KERNEL;
- if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) {
+ t = ex_get_fault_handler_type(m->ip);
+ if (t == EX_HANDLER_FAULT) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
}
+ if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
+ m->kflags |= MCE_IN_KERNEL_RECOV;
+ m->kflags |= MCE_IN_KERNEL_COPYIN;
+ return IN_KERNEL_RECOV;
+ }
return IN_KERNEL;
}
@@ -253,9 +321,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
-static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
+ char **msg, bool is_excp)
{
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
@@ -305,10 +374,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
return MCE_KEEP_SEVERITY;
}
-static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
struct severity *s;
for (s = severities;; s++) {
@@ -324,6 +394,12 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
continue;
if (s->excp && excp != s->excp)
continue;
+ if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
+ continue;
+ if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
+ continue;
+ if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
+ continue;
if (msg)
*msg = s->msg;
s->covered = 1;
@@ -336,7 +412,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
}
/* Default to mce_severity_intel */
-int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 31125448b174..05ef1f4550cb 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -55,9 +55,14 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
-void hv_setup_vmbus_irq(void (*handler)(void))
+int hv_setup_vmbus_irq(int irq, void (*handler)(void))
{
+ /*
+ * The 'irq' argument is ignored on x86/x64 because a hard-coded
+ * interrupt vector is used for Hyper-V interrupts.
+ */
vmbus_handler = handler;
+ return 0;
}
void hv_remove_vmbus_irq(void)
@@ -248,7 +253,7 @@ static void __init ms_hyperv_init_platform(void)
hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF);
}
- if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
x86_platform.calibrate_tsc = hv_get_tsc_khz;
x86_platform.calibrate_cpu = hv_get_tsc_khz;
@@ -270,7 +275,7 @@ static void __init ms_hyperv_init_platform(void)
crash_kexec_post_notifiers = true;
#ifdef CONFIG_X86_LOCAL_APIC
- if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
/*
* Get the APIC frequency.
@@ -296,7 +301,7 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
- if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
+ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
} else {
@@ -330,7 +335,7 @@ static void __init ms_hyperv_init_platform(void)
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_hyperv_callback);
/* Setup the IDT for reenlightenment notifications */
- if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) {
+ if (ms_hyperv.features & HV_ACCESS_REENLIGHTENMENT) {
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
asm_sysvec_hyperv_reenlightenment);
}
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 6a9df71c1b9e..e5f4ee8f4c3b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "MB",
.domains = domain_init(RDT_RESOURCE_MBA),
.cache_level = 3,
+ .parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
@@ -254,22 +255,30 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
{
union cpuid_0x10_3_eax eax;
union cpuid_0x10_x_edx edx;
- u32 ebx, ecx;
+ u32 ebx, ecx, max_delay;
cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
r->num_closid = edx.split.cos_max + 1;
- r->membw.max_delay = eax.split.max_delay + 1;
+ max_delay = eax.split.max_delay + 1;
r->default_ctrl = MAX_MBA_BW;
+ r->membw.arch_needs_linear = true;
if (ecx & MBA_IS_LINEAR) {
r->membw.delay_linear = true;
- r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
- r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
+ r->membw.min_bw = MAX_MBA_BW - max_delay;
+ r->membw.bw_gran = MAX_MBA_BW - max_delay;
} else {
if (!rdt_get_mb_table(r))
return false;
+ r->membw.arch_needs_linear = false;
}
r->data_width = 3;
+ if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
+ r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
+ else
+ r->membw.throttle_mode = THREAD_THROTTLE_MAX;
+ thread_throttle_mode_init();
+
r->alloc_capable = true;
r->alloc_enabled = true;
@@ -288,7 +297,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
/* AMD does not use delay */
r->membw.delay_linear = false;
+ r->membw.arch_needs_linear = false;
+ /*
+ * AMD does not use memory delay throttle model to control
+ * the allocation like Intel does.
+ */
+ r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
r->membw.min_bw = 0;
r->membw.bw_gran = 1;
/* Max value is 2048, Data width should be 4 in decimal */
@@ -346,19 +361,6 @@ static void rdt_get_cdp_l2_config(void)
rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
}
-static int get_cache_id(int cpu, int level)
-{
- struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
- int i;
-
- for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == level)
- return ci->info_list[i].id;
- }
-
- return -1;
-}
-
static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
@@ -556,13 +558,13 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
*/
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
- int id = get_cache_id(cpu, r->cache_level);
+ int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct list_head *add_pos = NULL;
struct rdt_domain *d;
d = rdt_find_domain(r, id, &add_pos);
if (IS_ERR(d)) {
- pr_warn("Could't find cache id for cpu %d\n", cpu);
+ pr_warn("Couldn't find cache id for CPU %d\n", cpu);
return;
}
@@ -602,12 +604,12 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{
- int id = get_cache_id(cpu, r->cache_level);
+ int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
struct rdt_domain *d;
d = rdt_find_domain(r, id, NULL);
if (IS_ERR_OR_NULL(d)) {
- pr_warn("Could't find cache id for cpu %d\n", cpu);
+ pr_warn("Couldn't find cache id for CPU %d\n", cpu);
return;
}
@@ -918,12 +920,12 @@ static __init void rdt_init_res_defs_intel(void)
r->rid == RDT_RESOURCE_L3CODE ||
r->rid == RDT_RESOURCE_L2 ||
r->rid == RDT_RESOURCE_L2DATA ||
- r->rid == RDT_RESOURCE_L2CODE)
- r->cbm_validate = cbm_validate_intel;
- else if (r->rid == RDT_RESOURCE_MBA) {
+ r->rid == RDT_RESOURCE_L2CODE) {
+ r->cache.arch_has_sparse_bitmaps = false;
+ r->cache.arch_has_empty_bitmaps = false;
+ } else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_THRTL_BASE;
r->msr_update = mba_wrmsr_intel;
- r->parse_ctrlval = parse_bw_intel;
}
}
}
@@ -938,12 +940,12 @@ static __init void rdt_init_res_defs_amd(void)
r->rid == RDT_RESOURCE_L3CODE ||
r->rid == RDT_RESOURCE_L2 ||
r->rid == RDT_RESOURCE_L2DATA ||
- r->rid == RDT_RESOURCE_L2CODE)
- r->cbm_validate = cbm_validate_amd;
- else if (r->rid == RDT_RESOURCE_MBA) {
+ r->rid == RDT_RESOURCE_L2CODE) {
+ r->cache.arch_has_sparse_bitmaps = true;
+ r->cache.arch_has_empty_bitmaps = true;
+ } else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_BW_BASE;
r->msr_update = mba_wrmsr_amd;
- r->parse_ctrlval = parse_bw_amd;
}
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 934c8fb8a64a..c877642e8a14 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -23,53 +23,6 @@
/*
* Check whether MBA bandwidth percentage value is correct. The value is
- * checked against the minimum and maximum bandwidth values specified by
- * the hardware. The allocated bandwidth percentage is rounded to the next
- * control step available on the hardware.
- */
-static bool bw_validate_amd(char *buf, unsigned long *data,
- struct rdt_resource *r)
-{
- unsigned long bw;
- int ret;
-
- ret = kstrtoul(buf, 10, &bw);
- if (ret) {
- rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
- return false;
- }
-
- if (bw < r->membw.min_bw || bw > r->default_ctrl) {
- rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
- r->membw.min_bw, r->default_ctrl);
- return false;
- }
-
- *data = roundup(bw, (unsigned long)r->membw.bw_gran);
- return true;
-}
-
-int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
- struct rdt_domain *d)
-{
- unsigned long bw_val;
-
- if (d->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
- return -EINVAL;
- }
-
- if (!bw_validate_amd(data->buf, &bw_val, r))
- return -EINVAL;
-
- d->new_ctrl = bw_val;
- d->have_new_ctrl = true;
-
- return 0;
-}
-
-/*
- * Check whether MBA bandwidth percentage value is correct. The value is
* checked against the minimum and max bandwidth values specified by the
* hardware. The allocated bandwidth percentage is rounded to the next
* control step available on the hardware.
@@ -82,7 +35,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
/*
* Only linear delay values is supported for current Intel SKUs.
*/
- if (!r->membw.delay_linear) {
+ if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
rdt_last_cmd_puts("No support for non-linear MB domains\n");
return false;
}
@@ -104,8 +57,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
-int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
- struct rdt_domain *d)
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
unsigned long bw_val;
@@ -123,12 +76,14 @@ int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
}
/*
- * Check whether a cache bit mask is valid. The SDM says:
+ * Check whether a cache bit mask is valid.
+ * For Intel the SDM says:
* Please note that all (and only) contiguous '1' combinations
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set.
+ * AMD allows non-contiguous bitmasks.
*/
-bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
+static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
{
unsigned long first_bit, zero_bit, val;
unsigned int cbm_len = r->cache.cbm_len;
@@ -140,7 +95,8 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
return false;
}
- if (val == 0 || val > r->default_ctrl) {
+ if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
+ val > r->default_ctrl) {
rdt_last_cmd_puts("Mask out of range\n");
return false;
}
@@ -148,7 +104,9 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
- if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
+ /* Are non-contiguous bitmaps allowed? */
+ if (!r->cache.arch_has_sparse_bitmaps &&
+ (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
return false;
}
@@ -164,30 +122,6 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
}
/*
- * Check whether a cache bit mask is valid. AMD allows non-contiguous
- * bitmasks
- */
-bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
-{
- unsigned long val;
- int ret;
-
- ret = kstrtoul(buf, 16, &val);
- if (ret) {
- rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
- return false;
- }
-
- if (val > r->default_ctrl) {
- rdt_last_cmd_puts("Mask out of range\n");
- return false;
- }
-
- *data = val;
- return true;
-}
-
-/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
@@ -212,7 +146,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
return -EINVAL;
}
- if (!r->cbm_validate(data->buf, &cbm_val, r))
+ if (!cbm_validate(data->buf, &cbm_val, r))
return -EINVAL;
if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5ffa32256b3b..80fa997fae60 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -283,7 +283,6 @@ struct rftype {
* struct mbm_state - status for each MBM counter in each domain
* @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
* @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
- * @chunks_bw Total local data moved. Used for bandwidth calculation
* @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
* @prev_bw The most recent bandwidth in MBps
* @delta_bw Difference between the current and previous bandwidth
@@ -292,7 +291,6 @@ struct rftype {
struct mbm_state {
u64 chunks;
u64 prev_msr;
- u64 chunks_bw;
u64 prev_bw_msr;
u32 prev_bw;
u32 delta_bw;
@@ -360,6 +358,8 @@ struct msr_param {
* in a cache bit mask
* @shareable_bits: Bitmask of shareable resource with other
* executing entities
+ * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
+ * @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
*/
struct rdt_cache {
unsigned int cbm_len;
@@ -367,25 +367,43 @@ struct rdt_cache {
unsigned int cbm_idx_mult;
unsigned int cbm_idx_offset;
unsigned int shareable_bits;
+ bool arch_has_sparse_bitmaps;
+ bool arch_has_empty_bitmaps;
+};
+
+/**
+ * enum membw_throttle_mode - System's memory bandwidth throttling mode
+ * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system
+ * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core
+ * always using smallest bandwidth percentage
+ * assigned to threads, aka "max throttling"
+ * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread
+ */
+enum membw_throttle_mode {
+ THREAD_THROTTLE_UNDEFINED = 0,
+ THREAD_THROTTLE_MAX,
+ THREAD_THROTTLE_PER_THREAD,
};
/**
* struct rdt_membw - Memory bandwidth allocation related data
- * @max_delay: Max throttle delay. Delay is the hardware
- * representation for memory bandwidth.
* @min_bw: Minimum memory bandwidth percentage user can request
* @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale
+ * @arch_needs_linear: True if we can't configure non-linear resources
+ * @throttle_mode: Bandwidth throttling mode when threads request
+ * different memory bandwidths
* @mba_sc: True if MBA software controller(mba_sc) is enabled
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct rdt_membw {
- u32 max_delay;
- u32 min_bw;
- u32 bw_gran;
- u32 delay_linear;
- bool mba_sc;
- u32 *mb_map;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ bool arch_needs_linear;
+ enum membw_throttle_mode throttle_mode;
+ bool mba_sc;
+ u32 *mb_map;
};
static inline bool is_llc_occupancy_enabled(void)
@@ -437,7 +455,6 @@ struct rdt_parse_data {
* @cache: Cache allocation related data
* @format_str: Per resource format string to show domain value
* @parse_ctrlval: Per resource function pointer to parse control values
- * @cbm_validate Cache bitmask validate function
* @evt_list: List of monitoring events
* @num_rmid: Number of RMIDs available
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
@@ -464,7 +481,6 @@ struct rdt_resource {
int (*parse_ctrlval)(struct rdt_parse_data *data,
struct rdt_resource *r,
struct rdt_domain *d);
- bool (*cbm_validate)(char *buf, u32 *data, struct rdt_resource *r);
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
@@ -474,10 +490,8 @@ struct rdt_resource {
int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d);
-int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
- struct rdt_domain *d);
-int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
- struct rdt_domain *d);
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
@@ -609,8 +623,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free);
-bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
-bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
+void __init thread_throttle_mode_init(void);
#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 837d7d012b7b..54dffe574e67 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -279,8 +279,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
return;
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
- m->chunks_bw += chunks;
- m->chunks = m->chunks_bw;
+ m->chunks += chunks;
cur_bw = (chunks * r->mon_scale) >> 20;
if (m->delta_comp)
@@ -478,19 +477,13 @@ void cqm_handle_limbo(struct work_struct *work)
mutex_lock(&rdtgroup_mutex);
r = &rdt_resources_all[RDT_RESOURCE_L3];
- d = get_domain_from_cpu(cpu, r);
-
- if (!d) {
- pr_warn_once("Failure to get domain for limbo worker\n");
- goto out_unlock;
- }
+ d = container_of(work, struct rdt_domain, cqm_limbo.work);
__check_limbo(d, false);
if (has_busy_rmid(r, d))
schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
-out_unlock:
mutex_unlock(&rdtgroup_mutex);
}
@@ -520,10 +513,7 @@ void mbm_handle_overflow(struct work_struct *work)
goto out_unlock;
r = &rdt_resources_all[RDT_RESOURCE_L3];
-
- d = get_domain_from_cpu(cpu, r);
- if (!d)
- goto out_unlock;
+ d = container_of(work, struct rdt_domain, mbm_over.work);
list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
mbm_update(r, d, prgrp->mon.rmid);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 3f844f14fc0a..af323e2e3100 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -561,7 +561,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
* callback has been invoked.
*/
atomic_inc(&rdtgrp->waitcount);
- ret = task_work_add(tsk, &callback->work, true);
+ ret = task_work_add(tsk, &callback->work, TWA_RESUME);
if (ret) {
/*
* Task is exiting. Drop the refcount and free the callback.
@@ -592,6 +592,18 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
return ret;
}
+static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
+{
+ return (rdt_alloc_capable &&
+ (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
+}
+
+static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
+{
+ return (rdt_mon_capable &&
+ (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
+}
+
/**
* rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
* @r: Resource group
@@ -607,8 +619,7 @@ int rdtgroup_tasks_assigned(struct rdtgroup *r)
rcu_read_lock();
for_each_process_thread(p, t) {
- if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
- (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
+ if (is_closid_match(t, r) || is_rmid_match(t, r)) {
ret = 1;
break;
}
@@ -706,8 +717,7 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
rcu_read_lock();
for_each_process_thread(p, t) {
- if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
- (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
+ if (is_closid_match(t, r) || is_rmid_match(t, r))
seq_printf(s, "%d\n", t->pid);
}
rcu_read_unlock();
@@ -1017,6 +1027,19 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
return 0;
}
+static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
+ seq_puts(seq, "per-thread\n");
+ else
+ seq_puts(seq, "max\n");
+
+ return 0;
+}
+
static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
@@ -1513,6 +1536,17 @@ static struct rftype res_common_files[] = {
.seq_show = rdt_delay_linear_show,
.fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
},
+ /*
+ * Platform specific which (if any) capabilities are provided by
+ * thread_throttle_mode. Defer "fflags" initialization to platform
+ * discovery.
+ */
+ {
+ .name = "thread_throttle_mode",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_thread_throttle_mode_show,
+ },
{
.name = "max_threshold_occupancy",
.mode = 0644,
@@ -1583,7 +1617,7 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
lockdep_assert_held(&rdtgroup_mutex);
for (rft = rfts; rft < rfts + len; rft++) {
- if ((fflags & rft->fflags) == rft->fflags) {
+ if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
ret = rdtgroup_add_file(kn, rft);
if (ret)
goto error;
@@ -1600,6 +1634,33 @@ error:
return ret;
}
+static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
+{
+ struct rftype *rfts, *rft;
+ int len;
+
+ rfts = res_common_files;
+ len = ARRAY_SIZE(res_common_files);
+
+ for (rft = rfts; rft < rfts + len; rft++) {
+ if (!strcmp(rft->name, name))
+ return rft;
+ }
+
+ return NULL;
+}
+
+void __init thread_throttle_mode_init(void)
+{
+ struct rftype *rft;
+
+ rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
+ if (!rft)
+ return;
+
+ rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
+}
+
/**
* rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
* @r: The resource group with which the file is associated.
@@ -2245,18 +2306,6 @@ static int reset_all_ctrls(struct rdt_resource *r)
return 0;
}
-static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
-{
- return (rdt_alloc_capable &&
- (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
-}
-
-static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
-{
- return (rdt_mon_capable &&
- (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
-}
-
/*
* Move tasks from one to the other group. If @from is NULL, then all tasks
* in the systems are moved unconditionally (used for teardown).
@@ -3196,7 +3245,7 @@ int __init rdtgroup_init(void)
* It may also be ok since that would enable debugging of RDT before
* resctrl is mounted.
* The reason why the debugfs directory is created here and not in
- * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
+ * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
* during the debugfs directory creation also &sb->s_type->i_mutex_key
* (the lockdep class of inode->i_rwsem). Other filesystem
* interactions (eg. SyS_getdents) have the lock ordering:
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 62b137c3c97a..866c9a9bcdee 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -35,12 +35,15 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
{ X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
{ X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
+ { X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
{ X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
+ { X86_FEATURE_SEV_ES, CPUID_EAX, 3, 0x8000001f, 0 },
+ { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 9b6fafa69be9..924571fe5864 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,7 @@
#include <asm/timer.h>
#include <asm/apic.h>
#include <asm/vmware.h>
+#include <asm/svm.h>
#undef pr_fmt
#define pr_fmt(fmt) "vmware: " fmt
@@ -476,10 +477,49 @@ static bool __init vmware_legacy_x2apic_available(void)
(eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
+ struct pt_regs *regs)
+{
+ /* Copy VMWARE specific Hypercall parameters to the GHCB */
+ ghcb_set_rip(ghcb, regs->ip);
+ ghcb_set_rbx(ghcb, regs->bx);
+ ghcb_set_rcx(ghcb, regs->cx);
+ ghcb_set_rdx(ghcb, regs->dx);
+ ghcb_set_rsi(ghcb, regs->si);
+ ghcb_set_rdi(ghcb, regs->di);
+ ghcb_set_rbp(ghcb, regs->bp);
+}
+
+static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
+{
+ if (!(ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb) &&
+ ghcb_rsi_is_valid(ghcb) &&
+ ghcb_rdi_is_valid(ghcb) &&
+ ghcb_rbp_is_valid(ghcb)))
+ return false;
+
+ regs->bx = ghcb->save.rbx;
+ regs->cx = ghcb->save.rcx;
+ regs->dx = ghcb->save.rdx;
+ regs->si = ghcb->save.rsi;
+ regs->di = ghcb->save.rdi;
+ regs->bp = ghcb->save.rbp;
+
+ return true;
+}
+#endif
+
const __initconst struct hypervisor_x86 x86_hyper_vmware = {
- .name = "VMware",
- .detect = vmware_platform,
- .type = X86_HYPER_VMWARE,
- .init.init_platform = vmware_platform_setup,
- .init.x2apic_available = vmware_legacy_x2apic_available,
+ .name = "VMware",
+ .detect = vmware_platform,
+ .type = X86_HYPER_VMWARE,
+ .init.init_platform = vmware_platform_setup,
+ .init.x2apic_available = vmware_legacy_x2apic_available,
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ .runtime.sev_es_hcall_prepare = vmware_sev_es_hcall_prepare,
+ .runtime.sev_es_hcall_finish = vmware_sev_es_hcall_finish,
+#endif
};
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index a0e8fc7d85f1..ddffd80f5c52 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -229,8 +229,8 @@ static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
it = &of_ioapic_type[type_index];
ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity);
- tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
- tmp.ioapic_pin = fwspec->param[0];
+ tmp.devid = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
+ tmp.ioapic.pin = fwspec->param[0];
return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp);
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 48ce44576947..25c06b67e7e0 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -29,8 +29,8 @@ static int die_counter;
static struct pt_regs exec_summary_regs;
-bool in_task_stack(unsigned long *stack, struct task_struct *task,
- struct stack_info *info)
+bool noinstr in_task_stack(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info)
{
unsigned long *begin = task_stack_page(task);
unsigned long *end = task_stack_page(task) + THREAD_SIZE;
@@ -46,7 +46,8 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
return true;
}
-bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+/* Called from get_stack_info_noinstr - so must be noinstr too */
+bool noinstr in_entry_stack(unsigned long *stack, struct stack_info *info)
{
struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
@@ -115,7 +116,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
unsigned long prologue = regs->ip - PROLOGUE_SIZE;
if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
- printk("%sCode: Bad RIP value.\n", loglvl);
+ printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
+ loglvl, prologue);
} else {
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
__stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 4a94d38cd141..1dd851397bd9 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -24,11 +24,13 @@ static const char * const exception_stack_names[] = {
[ ESTACK_NMI ] = "NMI",
[ ESTACK_DB ] = "#DB",
[ ESTACK_MCE ] = "#MC",
+ [ ESTACK_VC ] = "#VC",
+ [ ESTACK_VC2 ] = "#VC2",
};
const char *stack_type_name(enum stack_type type)
{
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
if (type == STACK_TYPE_IRQ)
return "IRQ";
@@ -79,16 +81,18 @@ struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
EPAGERANGE(NMI),
EPAGERANGE(DB),
EPAGERANGE(MCE),
+ EPAGERANGE(VC),
+ EPAGERANGE(VC2),
};
-static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
+static __always_inline bool in_exception_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long begin, end, stk = (unsigned long)stack;
const struct estack_pages *ep;
struct pt_regs *regs;
unsigned int k;
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
/*
@@ -122,7 +126,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
return true;
}
-static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
+static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
@@ -147,32 +151,38 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
return true;
}
-int get_stack_info(unsigned long *stack, struct task_struct *task,
- struct stack_info *info, unsigned long *visit_mask)
+bool noinstr get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info)
{
- if (!stack)
- goto unknown;
-
- task = task ? : current;
-
if (in_task_stack(stack, task, info))
- goto recursion_check;
+ return true;
if (task != current)
- goto unknown;
+ return false;
if (in_exception_stack(stack, info))
- goto recursion_check;
+ return true;
if (in_irq_stack(stack, info))
- goto recursion_check;
+ return true;
if (in_entry_stack(stack, info))
- goto recursion_check;
+ return true;
+
+ return false;
+}
+
+int get_stack_info(unsigned long *stack, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask)
+{
+ task = task ? : current;
- goto unknown;
+ if (!stack)
+ goto unknown;
+
+ if (!get_stack_info_noinstr(stack, task, info))
+ goto unknown;
-recursion_check:
/*
* Make sure we don't iterate through any given stack more than once.
* If it comes up a second time then there's something wrong going on:
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 983cd53ed4c9..22aad412f965 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -305,6 +305,20 @@ static int __init cpcompare(const void *a, const void *b)
return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr);
}
+static bool e820_nomerge(enum e820_type type)
+{
+ /*
+ * These types may indicate distinct platform ranges aligned to
+ * numa node, protection domain, performance domain, or other
+ * boundaries. Do not merge them.
+ */
+ if (type == E820_TYPE_PRAM)
+ return true;
+ if (type == E820_TYPE_SOFT_RESERVED)
+ return true;
+ return false;
+}
+
int __init e820__update_table(struct e820_table *table)
{
struct e820_entry *entries = table->entries;
@@ -380,7 +394,7 @@ int __init e820__update_table(struct e820_table *table)
}
/* Continue building up new map based on this information: */
- if (current_type != last_type || current_type == E820_TYPE_PRAM) {
+ if (current_type != last_type || e820_nomerge(current_type)) {
if (last_type != 0) {
new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr;
/* Move forward only if the new size was non-zero: */
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 61ddc3a5e5c2..701f196d7c68 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -5,7 +5,6 @@
#include <asm/fpu/internal.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>
-#include <asm/cmdline.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
@@ -238,51 +237,11 @@ static void __init fpu__init_system_ctx_switch(void)
}
/*
- * We parse fpu parameters early because fpu__init_system() is executed
- * before parse_early_param().
- */
-static void __init fpu__init_parse_early_param(void)
-{
- char arg[32];
- char *argptr = arg;
- int bit;
-
-#ifdef CONFIG_X86_32
- if (cmdline_find_option_bool(boot_command_line, "no387"))
-#ifdef CONFIG_MATH_EMULATION
- setup_clear_cpu_cap(X86_FEATURE_FPU);
-#else
- pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
-#endif
-
- if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
- setup_clear_cpu_cap(X86_FEATURE_FXSR);
-#endif
-
- if (cmdline_find_option_bool(boot_command_line, "noxsave"))
- setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-
- if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-
- if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-
- if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
- sizeof(arg)) &&
- get_option(&argptr, &bit) &&
- bit >= 0 &&
- bit < NCAPINTS * 32)
- setup_clear_cpu_cap(bit);
-}
-
-/*
* Called on the boot CPU once per system bootup, to set up the initial
* FPU state that is later cloned into all processes:
*/
void __init fpu__init_system(struct cpuinfo_x86 *c)
{
- fpu__init_parse_early_param();
fpu__init_system_early_generic(c);
/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 038e19c0019e..5d8047441a0a 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -37,6 +37,7 @@ static const char *xfeature_names[] =
"AVX-512 ZMM_Hi256" ,
"Processor Trace (unused)" ,
"Protection Keys User registers",
+ "PASID state",
"unknown xstate feature" ,
};
@@ -51,6 +52,7 @@ static short xsave_cpuid_features[] __initdata = {
X86_FEATURE_AVX512F,
X86_FEATURE_INTEL_PT,
X86_FEATURE_PKU,
+ X86_FEATURE_ENQCMD,
};
/*
@@ -318,6 +320,7 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
print_xstate_feature(XFEATURE_MASK_PKRU);
+ print_xstate_feature(XFEATURE_MASK_PASID);
}
/*
@@ -592,6 +595,7 @@ static void check_xstate_against_struct(int nr)
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
+ XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
/*
* Make *SURE* to add any feature numbers in below if
@@ -601,7 +605,7 @@ static void check_xstate_against_struct(int nr)
if ((nr < XFEATURE_YMM) ||
(nr >= XFEATURE_MAX) ||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
- ((nr >= XFEATURE_RSRVD_COMP_10) && (nr <= XFEATURE_LBR))) {
+ ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) {
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
XSTATE_WARN_ON(1);
}
@@ -1398,3 +1402,60 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_IOMMU_SUPPORT
+void update_pasid(void)
+{
+ u64 pasid_state;
+ u32 pasid;
+
+ if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+ return;
+
+ if (!current->mm)
+ return;
+
+ pasid = READ_ONCE(current->mm->pasid);
+ /* Set the valid bit in the PASID MSR/state only for valid pasid. */
+ pasid_state = pasid == PASID_DISABLED ?
+ pasid : pasid | MSR_IA32_PASID_VALID;
+
+ /*
+ * No need to hold fregs_lock() since the task's fpstate won't
+ * be changed by others (e.g. ptrace) while the task is being
+ * switched to or is in IPI.
+ */
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+ /* The MSR is active and can be directly updated. */
+ wrmsrl(MSR_IA32_PASID, pasid_state);
+ } else {
+ struct fpu *fpu = &current->thread.fpu;
+ struct ia32_pasid_state *ppasid_state;
+ struct xregs_state *xsave;
+
+ /*
+ * The CPU's xstate registers are not currently active. Just
+ * update the PASID state in the memory buffer here. The
+ * PASID MSR will be loaded when returning to user mode.
+ */
+ xsave = &fpu->state.xsave;
+ xsave->header.xfeatures |= XFEATURE_MASK_PASID;
+ ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
+ /*
+ * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
+ * won't be NULL and no need to check its value.
+ *
+ * Only update the task's PASID state when it's different
+ * from the mm's pasid.
+ */
+ if (ppasid_state->pasid != pasid_state) {
+ /*
+ * Invalid fpregs so that state restoring will pick up
+ * the PASID state.
+ */
+ __fpu_invalidate_fpregs_state(fpu);
+ ppasid_state->pasid = pasid_state;
+ }
+ }
+}
+#endif /* CONFIG_IOMMU_SUPPORT */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index cbb71c1b574f..05e117137b45 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -36,6 +36,11 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
#include <asm/fixmap.h>
+#include <asm/realmode.h>
+#include <asm/desc.h>
+#include <asm/extable.h>
+#include <asm/trapnr.h>
+#include <asm/sev-es.h>
/*
* Manage page tables very early on.
@@ -61,7 +66,25 @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif
-#define __head __section(.head.text)
+/*
+ * GDT used on the boot CPU before switching to virtual addresses.
+ */
+static struct desc_struct startup_gdt[GDT_ENTRIES] = {
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+};
+
+/*
+ * Address needs to be set at runtime because it references the startup_gdt
+ * while the kernel still uses a direct mapping.
+ */
+static struct desc_ptr startup_gdt_descr = {
+ .size = sizeof(startup_gdt),
+ .address = 0,
+};
+
+#define __head __section(".head.text")
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
{
@@ -297,7 +320,7 @@ static void __init reset_early_page_tables(void)
}
/* Create a new PMD entry */
-int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
+bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
{
unsigned long physaddr = address - __PAGE_OFFSET;
pgdval_t pgd, *pgd_p;
@@ -307,7 +330,7 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
/* Invalid address or early pgt is done ? */
if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
- return -1;
+ return false;
again:
pgd_p = &early_top_pgt[pgd_index(address)].pgd;
@@ -364,10 +387,10 @@ again:
}
pmd_p[pmd_index(address)] = pmd;
- return 0;
+ return true;
}
-int __init early_make_pgtable(unsigned long address)
+static bool __init early_make_pgtable(unsigned long address)
{
unsigned long physaddr = address - __PAGE_OFFSET;
pmdval_t pmd;
@@ -377,6 +400,19 @@ int __init early_make_pgtable(unsigned long address)
return __early_make_pgtable(address, pmd);
}
+void __init do_early_exception(struct pt_regs *regs, int trapnr)
+{
+ if (trapnr == X86_TRAP_PF &&
+ early_make_pgtable(native_read_cr2()))
+ return;
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) &&
+ trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs))
+ return;
+
+ early_fixup_exception(regs, trapnr);
+}
+
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
static void __init clear_bss(void)
@@ -489,3 +525,81 @@ void __init x86_64_start_reservations(char *real_mode_data)
start_kernel();
}
+
+/*
+ * Data structures and code used for IDT setup in head_64.S. The bringup-IDT is
+ * used until the idt_table takes over. On the boot CPU this happens in
+ * x86_64_start_kernel(), on secondary CPUs in start_secondary(). In both cases
+ * this happens in the functions called from head_64.S.
+ *
+ * The idt_table can't be used that early because all the code modifying it is
+ * in idt.c and can be instrumented by tracing or KASAN, which both don't work
+ * during early CPU bringup. Also the idt_table has the runtime vectors
+ * configured which require certain CPU state to be setup already (like TSS),
+ * which also hasn't happened yet in early CPU bringup.
+ */
+static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
+
+static struct desc_ptr bringup_idt_descr = {
+ .size = (NUM_EXCEPTION_VECTORS * sizeof(gate_desc)) - 1,
+ .address = 0, /* Set at runtime */
+};
+
+static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler)
+{
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ struct idt_data data;
+ gate_desc desc;
+
+ init_idt_data(&data, n, handler);
+ idt_init_desc(&desc, &data);
+ native_write_idt_entry(idt, n, &desc);
+#endif
+}
+
+/* This runs while still in the direct mapping */
+static void startup_64_load_idt(unsigned long physbase)
+{
+ struct desc_ptr *desc = fixup_pointer(&bringup_idt_descr, physbase);
+ gate_desc *idt = fixup_pointer(bringup_idt_table, physbase);
+
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+ void *handler;
+
+ /* VMM Communication Exception */
+ handler = fixup_pointer(vc_no_ghcb, physbase);
+ set_bringup_idt_handler(idt, X86_TRAP_VC, handler);
+ }
+
+ desc->address = (unsigned long)idt;
+ native_load_idt(desc);
+}
+
+/* This is used when running on kernel addresses */
+void early_setup_idt(void)
+{
+ /* VMM Communication Exception */
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
+ set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb);
+
+ bringup_idt_descr.address = (unsigned long)bringup_idt_table;
+ native_load_idt(&bringup_idt_descr);
+}
+
+/*
+ * Setup boot CPU state needed before kernel switches to virtual addresses.
+ */
+void __head startup_64_setup_env(unsigned long physbase)
+{
+ /* Load GDT */
+ startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase);
+ native_load_gdt(&startup_gdt_descr);
+
+ /* New GDT is live - reload data segment registers */
+ asm volatile("movl %%eax, %%ds\n"
+ "movl %%eax, %%ss\n"
+ "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
+
+ startup_64_load_idt(physbase);
+}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 16da4ac01597..7eb2a1c87969 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -73,6 +73,20 @@ SYM_CODE_START_NOALIGN(startup_64)
/* Set up the stack for verify_cpu(), similar to initial_stack below */
leaq (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
+ leaq _text(%rip), %rdi
+ pushq %rsi
+ call startup_64_setup_env
+ popq %rsi
+
+ /* Now switch to __KERNEL_CS so IRET works reliably */
+ pushq $__KERNEL_CS
+ leaq .Lon_kernel_cs(%rip), %rax
+ pushq %rax
+ lretq
+
+.Lon_kernel_cs:
+ UNWIND_HINT_EMPTY
+
/* Sanitize CPU configuration */
call verify_cpu
@@ -112,6 +126,18 @@ SYM_CODE_START(secondary_startup_64)
call verify_cpu
/*
+ * The secondary_startup_64_no_verify entry point is only used by
+ * SEV-ES guests. In those guests the call to verify_cpu() would cause
+ * #VC exceptions which can not be handled at this stage of secondary
+ * CPU bringup.
+ *
+ * All non SEV-ES systems, especially Intel systems, need to execute
+ * verify_cpu() above to make sure NX is enabled.
+ */
+SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
+ UNWIND_HINT_EMPTY
+
+ /*
* Retrieve the modifier (SME encryption mask if SME is active) to be
* added to the initial pgdir entry that will be programmed into CR3.
*/
@@ -144,33 +170,6 @@ SYM_CODE_START(secondary_startup_64)
1:
UNWIND_HINT_EMPTY
- /* Check if nx is implemented */
- movl $0x80000001, %eax
- cpuid
- movl %edx,%edi
-
- /* Setup EFER (Extended Feature Enable Register) */
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_SCE, %eax /* Enable System Call */
- btl $20,%edi /* No Execute supported? */
- jnc 1f
- btsl $_EFER_NX, %eax
- btsq $_PAGE_BIT_NX,early_pmd_flags(%rip)
-1: wrmsr /* Make changes effective */
-
- /* Setup cr0 */
- movl $CR0_STATE, %eax
- /* Make changes effective */
- movq %rax, %cr0
-
- /* Setup a boot time stack */
- movq initial_stack(%rip), %rsp
-
- /* zero EFLAGS after setting rsp */
- pushq $0
- popfq
-
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
@@ -205,6 +204,41 @@ SYM_CODE_START(secondary_startup_64)
movl initial_gs+4(%rip),%edx
wrmsr
+ /*
+ * Setup a boot time stack - Any secondary CPU will have lost its stack
+ * by now because the cr3-switch above unmaps the real-mode stack
+ */
+ movq initial_stack(%rip), %rsp
+
+ /* Setup and Load IDT */
+ pushq %rsi
+ call early_setup_idt
+ popq %rsi
+
+ /* Check if nx is implemented */
+ movl $0x80000001, %eax
+ cpuid
+ movl %edx,%edi
+
+ /* Setup EFER (Extended Feature Enable Register) */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_SCE, %eax /* Enable System Call */
+ btl $20,%edi /* No Execute supported? */
+ jnc 1f
+ btsl $_EFER_NX, %eax
+ btsq $_PAGE_BIT_NX,early_pmd_flags(%rip)
+1: wrmsr /* Make changes effective */
+
+ /* Setup cr0 */
+ movl $CR0_STATE, %eax
+ /* Make changes effective */
+ movq %rax, %cr0
+
+ /* zero EFLAGS after setting rsp */
+ pushq $0
+ popfq
+
/* rsi is pointer to real mode structure with interesting info.
pass it to C */
movq %rsi, %rdi
@@ -259,11 +293,47 @@ SYM_CODE_START(start_cpu0)
SYM_CODE_END(start_cpu0)
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/*
+ * VC Exception handler used during early boot when running on kernel
+ * addresses, but before the switch to the idt_table can be made.
+ * The early_idt_handler_array can't be used here because it calls into a lot
+ * of __init code and this handler is also used during CPU offlining/onlining.
+ * Therefore this handler ends up in the .text section so that it stays around
+ * when .init.text is freed.
+ */
+SYM_CODE_START_NOALIGN(vc_boot_ghcb)
+ UNWIND_HINT_IRET_REGS offset=8
+
+ /* Build pt_regs */
+ PUSH_AND_CLEAR_REGS
+
+ /* Call C handler */
+ movq %rsp, %rdi
+ movq ORIG_RAX(%rsp), %rsi
+ movq initial_vc_handler(%rip), %rax
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rax
+
+ /* Unwind pt_regs */
+ POP_REGS
+
+ /* Remove Error Code */
+ addq $8, %rsp
+
+ /* Pure iret required here - don't use INTERRUPT_RETURN */
+ iretq
+SYM_CODE_END(vc_boot_ghcb)
+#endif
+
/* Both SMP bootup and ACPI suspend change these variables */
__REFDATA
.balign 8
SYM_DATA(initial_code, .quad x86_64_start_kernel)
SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
+#endif
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder
@@ -319,22 +389,43 @@ SYM_CODE_START_LOCAL(early_idt_handler_common)
pushq %r15 /* pt_regs->r15 */
UNWIND_HINT_REGS
- cmpq $14,%rsi /* Page fault? */
- jnz 10f
- GET_CR2_INTO(%rdi) /* can clobber %rax if pv */
- call early_make_pgtable
- andl %eax,%eax
- jz 20f /* All good */
-
-10:
movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */
- call early_fixup_exception
+ call do_early_exception
-20:
decl early_recursion_flag(%rip)
jmp restore_regs_and_return_to_kernel
SYM_CODE_END(early_idt_handler_common)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/*
+ * VC Exception handler used during very early boot. The
+ * early_idt_handler_array can't be used because it returns via the
+ * paravirtualized INTERRUPT_RETURN and pv-ops don't work that early.
+ *
+ * This handler will end up in the .init.text section and not be
+ * available to boot secondary CPUs.
+ */
+SYM_CODE_START_NOALIGN(vc_no_ghcb)
+ UNWIND_HINT_IRET_REGS offset=8
+
+ /* Build pt_regs */
+ PUSH_AND_CLEAR_REGS
+
+ /* Call C handler */
+ movq %rsp, %rdi
+ movq ORIG_RAX(%rsp), %rsi
+ call do_vc_no_ghcb
+
+ /* Unwind pt_regs */
+ POP_REGS
+
+ /* Remove Error Code */
+ addq $8, %rsp
+
+ /* Pure iret required here - don't use INTERRUPT_RETURN */
+ iretq
+SYM_CODE_END(vc_no_ghcb)
+#endif
#define SYM_DATA_START_PAGE_ALIGNED(name) \
SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index b98ff620ba77..03aa33b58165 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -442,42 +442,6 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
}
/*
- * Dump the debug register contents to the user.
- * We can't dump our per cpu values because it
- * may contain cpu wide breakpoint, something that
- * doesn't belong to the current task.
- *
- * TODO: include non-ptrace user breakpoints (perf)
- */
-void aout_dump_debugregs(struct user *dump)
-{
- int i;
- int dr7 = 0;
- struct perf_event *bp;
- struct arch_hw_breakpoint *info;
- struct thread_struct *thread = &current->thread;
-
- for (i = 0; i < HBP_NUM; i++) {
- bp = thread->ptrace_bps[i];
-
- if (bp && !bp->attr.disabled) {
- dump->u_debugreg[i] = bp->attr.bp_addr;
- info = counter_arch_bp(bp);
- dr7 |= encode_dr7(i, info->len, info->type);
- } else {
- dump->u_debugreg[i] = 0;
- }
- }
-
- dump->u_debugreg[4] = 0;
- dump->u_debugreg[5] = 0;
- dump->u_debugreg[6] = current->thread.debugreg6;
-
- dump->u_debugreg[7] = dr7;
-}
-EXPORT_SYMBOL_GPL(aout_dump_debugregs);
-
-/*
* Release the user breakpoints used by ptrace
*/
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
@@ -490,7 +454,7 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
t->ptrace_bps[i] = NULL;
}
- t->debugreg6 = 0;
+ t->virtual_dr6 = 0;
t->ptrace_dr7 = 0;
}
@@ -500,7 +464,7 @@ void hw_breakpoint_restore(void)
set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
- set_debugreg(current->thread.debugreg6, 6);
+ set_debugreg(DR6_RESERVED, 6);
set_debugreg(__this_cpu_read(cpu_dr7), 7);
}
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
@@ -523,10 +487,10 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
*/
static int hw_breakpoint_handler(struct die_args *args)
{
- int i, cpu, rc = NOTIFY_STOP;
+ int i, rc = NOTIFY_STOP;
struct perf_event *bp;
- unsigned long dr6;
unsigned long *dr6_p;
+ unsigned long dr6;
/* The DR6 value is pointed by args->err */
dr6_p = (unsigned long *)ERR_PTR(args->err);
@@ -540,14 +504,6 @@ static int hw_breakpoint_handler(struct die_args *args)
if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE;
- /*
- * Assert that local interrupts are disabled
- * Reset the DRn bits in the virtualized register value.
- * The ptrace trigger routine will add in whatever is needed.
- */
- current->thread.debugreg6 &= ~DR_TRAP_BITS;
- cpu = get_cpu();
-
/* Handle all the breakpoints that were triggered */
for (i = 0; i < HBP_NUM; ++i) {
if (likely(!(dr6 & (DR_TRAP0 << i))))
@@ -561,7 +517,7 @@ static int hw_breakpoint_handler(struct die_args *args)
*/
rcu_read_lock();
- bp = per_cpu(bp_per_reg[i], cpu);
+ bp = this_cpu_read(bp_per_reg[i]);
/*
* Reset the 'i'th TRAP bit in dr6 to denote completion of
* exception handling
@@ -592,12 +548,10 @@ static int hw_breakpoint_handler(struct die_args *args)
* breakpoints (to generate signals) and b) when the system has
* taken exception due to multiple causes
*/
- if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
+ if ((current->thread.virtual_dr6 & DR_TRAP_BITS) ||
(dr6 & (~DR_TRAP_BITS)))
rc = NOTIFY_DONE;
- put_cpu();
-
return rc;
}
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 7ecf9babf0cb..ee1a283f8e96 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -11,13 +11,6 @@
#include <asm/desc.h>
#include <asm/hw_irq.h>
-struct idt_data {
- unsigned int vector;
- unsigned int segment;
- struct idt_bits bits;
- const void *addr;
-};
-
#define DPL0 0x0
#define DPL3 0x3
@@ -149,9 +142,6 @@ static const __initconst struct idt_data apic_idts[] = {
# ifdef CONFIG_IRQ_WORK
INTG(IRQ_WORK_VECTOR, asm_sysvec_irq_work),
# endif
-# ifdef CONFIG_X86_UV
- INTG(UV_BAU_MESSAGE, asm_sysvec_uv_bau_message),
-# endif
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
#endif
@@ -178,20 +168,6 @@ bool idt_is_f00f_address(unsigned long address)
}
#endif
-static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
-{
- unsigned long addr = (unsigned long) d->addr;
-
- gate->offset_low = (u16) addr;
- gate->segment = (u16) d->segment;
- gate->bits = d->bits;
- gate->offset_middle = (u16) (addr >> 16);
-#ifdef CONFIG_X86_64
- gate->offset_high = (u32) (addr >> 32);
- gate->reserved = 0;
-#endif
-}
-
static __init void
idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys)
{
@@ -209,14 +185,7 @@ static __init void set_intr_gate(unsigned int n, const void *addr)
{
struct idt_data data;
- BUG_ON(n > 0xFF);
-
- memset(&data, 0, sizeof(data));
- data.vector = n;
- data.addr = addr;
- data.segment = __KERNEL_CS;
- data.bits.type = GATE_INTERRUPT;
- data.bits.p = 1;
+ init_idt_data(&data, n, addr);
idt_setup_from_table(idt_table, &data, 1, false);
}
@@ -257,11 +226,14 @@ static const __initconst struct idt_data early_pf_idts[] = {
* cpu_init() when the TSS has been initialized.
*/
static const __initconst struct idt_data ist_idts[] = {
- ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB),
- ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI),
- ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF),
+ ISTG(X86_TRAP_DB, asm_exc_debug, IST_INDEX_DB),
+ ISTG(X86_TRAP_NMI, asm_exc_nmi, IST_INDEX_NMI),
+ ISTG(X86_TRAP_DF, asm_exc_double_fault, IST_INDEX_DF),
#ifdef CONFIG_X86_MCE
- ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
+ ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE),
+#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC),
#endif
};
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 181060247e3c..c5dd50369e2f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -227,7 +227,7 @@ static __always_inline void handle_irq(struct irq_desc *desc,
struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_X86_64))
- run_on_irqstack_cond(desc->handle_irq, desc, regs);
+ run_irq_on_irqstack_cond(desc->handle_irq, desc, regs);
else
__handle_irq(desc, regs);
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1b4fe93a86c5..440eed558558 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -74,5 +74,5 @@ int irq_init_percpu_irqstack(unsigned int cpu)
void do_softirq_own_stack(void)
{
- run_on_irqstack_cond(__do_softirq, NULL, NULL);
+ run_on_irqstack_cond(__do_softirq, NULL);
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 57c2ecf43134..ce831f9448e7 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -200,8 +200,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
/* Copying screen_info will do? */
- memcpy(&params->screen_info, &boot_params.screen_info,
- sizeof(struct screen_info));
+ memcpy(&params->screen_info, &screen_info, sizeof(struct screen_info));
/* Fill in memsize later */
params->screen_info.ext_mem_k = 0;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index c2f02f308ecf..ff7878df96b4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -629,9 +629,10 @@ static void kgdb_hw_overflow_handler(struct perf_event *event,
struct task_struct *tsk = current;
int i;
- for (i = 0; i < 4; i++)
+ for (i = 0; i < 4; i++) {
if (breakinfo[i].enabled)
- tsk->thread.debugreg6 |= (DR_TRAP0 << i);
+ tsk->thread.virtual_dr6 |= (DR_TRAP0 << i);
+ }
}
void kgdb_arch_late(void)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index fdadc37d72af..547c7abb39f5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -38,9 +38,9 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
-#include <linux/frame.h>
#include <linux/kasan.h>
#include <linux/moduleloader.h>
+#include <linux/objtool.h>
#include <linux/vmalloc.h>
#include <linux/pgtable.h>
@@ -767,124 +767,21 @@ asm(
NOKPROBE_SYMBOL(kretprobe_trampoline);
STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+
/*
* Called from kretprobe_trampoline
*/
__used __visible void *trampoline_handler(struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
- kprobe_opcode_t *correct_ret_addr = NULL;
- void *frame_pointer;
- bool skipped = false;
-
- /*
- * Set a dummy kprobe for avoiding kretprobe recursion.
- * Since kretprobe never run in kprobe handler, kprobe must not
- * be running at this point.
- */
- kprobe_busy_begin();
-
- INIT_HLIST_HEAD(&empty_rp);
- kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
regs->cs = __KERNEL_CS;
#ifdef CONFIG_X86_32
- regs->cs |= get_kernel_rpl();
regs->gs = 0;
#endif
- /* We use pt_regs->sp for return address holder. */
- frame_pointer = &regs->sp;
- regs->ip = trampoline_address;
+ regs->ip = (unsigned long)&kretprobe_trampoline;
regs->orig_ax = ~0UL;
- /*
- * It is possible to have multiple instances associated with a given
- * task either because multiple functions in the call path have
- * return probes installed on them, and/or more than one
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always pushed into the head of the list
- * - when multiple return probes are registered for the same
- * function, the (chronologically) first instance's ret_addr
- * will be the real return address, and all the rest will
- * point to kretprobe_trampoline.
- */
- hlist_for_each_entry(ri, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
- /*
- * Return probes must be pushed on this hash list correct
- * order (same as return order) so that it can be popped
- * correctly. However, if we find it is pushed it incorrect
- * order, this means we find a function which should not be
- * probed, because the wrong order entry is pushed on the
- * path of processing other kretprobe itself.
- */
- if (ri->fp != frame_pointer) {
- if (!skipped)
- pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
- skipped = true;
- continue;
- }
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- if (skipped)
- pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
- ri->rp->kp.addr);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
- correct_ret_addr = ri->ret_addr;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
- if (ri->fp != frame_pointer)
- continue;
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- if (ri->rp && ri->rp->handler) {
- __this_cpu_write(current_kprobe, &ri->rp->kp);
- ri->ret_addr = correct_ret_addr;
- ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, &kprobe_busy);
- }
-
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_hash_unlock(current, &flags);
-
- kprobe_busy_end();
-
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- return (void *)orig_ret_address;
+ return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, &regs->sp);
}
NOKPROBE_SYMBOL(trampoline_handler);
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 40f380461e6d..041f0b50bc27 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -16,8 +16,9 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
-#include <linux/frame.h>
+#include <linux/objtool.h>
#include <linux/pgtable.h>
+#include <linux/static_call.h>
#include <asm/text-patching.h>
#include <asm/cacheflush.h>
@@ -181,7 +182,6 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
/* Save skipped registers */
regs->cs = __KERNEL_CS;
#ifdef CONFIG_X86_32
- regs->cs |= get_kernel_rpl();
regs->gs = 0;
#endif
regs->ip = (unsigned long)op->kp.addr + INT3_INSN_SIZE;
@@ -210,7 +210,8 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
/* Check whether the address range is reserved */
if (ftrace_text_reserved(src, src + len - 1) ||
alternatives_text_reserved(src, src + len - 1) ||
- jump_label_text_reserved(src, src + len - 1))
+ jump_label_text_reserved(src, src + len - 1) ||
+ static_call_text_reserved(src, src + len - 1))
return -EBUSY;
return len;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1b51b727b140..7f57ede3cb8e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -36,6 +36,8 @@
#include <asm/hypervisor.h>
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
+#include <asm/ptrace.h>
+#include <asm/svm.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -652,6 +654,7 @@ static void __init kvm_guest_init(void)
}
if (pv_tlb_flush_supported()) {
+ pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
pr_info("KVM setup pv remote TLB flush\n");
}
@@ -743,13 +746,34 @@ static void __init kvm_init_platform(void)
x86_platform.apic_post_init = kvm_apic_init;
}
+#if defined(CONFIG_AMD_MEM_ENCRYPT)
+static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
+{
+ /* RAX and CPL are already in the GHCB */
+ ghcb_set_rbx(ghcb, regs->bx);
+ ghcb_set_rcx(ghcb, regs->cx);
+ ghcb_set_rdx(ghcb, regs->dx);
+ ghcb_set_rsi(ghcb, regs->si);
+}
+
+static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
+{
+ /* No checking of the return state needed */
+ return true;
+}
+#endif
+
const __initconst struct hypervisor_x86 x86_hyper_kvm = {
- .name = "KVM",
- .detect = kvm_detect,
- .type = X86_HYPER_KVM,
- .init.guest_late_init = kvm_guest_init,
- .init.x2apic_available = kvm_para_available,
- .init.init_platform = kvm_init_platform,
+ .name = "KVM",
+ .detect = kvm_detect,
+ .type = X86_HYPER_KVM,
+ .init.guest_late_init = kvm_guest_init,
+ .init.x2apic_available = kvm_para_available,
+ .init.init_platform = kvm_init_platform,
+#if defined(CONFIG_AMD_MEM_ENCRYPT)
+ .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare,
+ .runtime.sev_es_hcall_finish = kvm_sev_es_hcall_finish,
+#endif
};
static __init int activate_jump_labels(void)
@@ -764,14 +788,6 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);
-static void kvm_free_pv_cpu_mask(void)
-{
- unsigned int cpu;
-
- for_each_possible_cpu(cpu)
- free_cpumask_var(per_cpu(__pv_cpu_mask, cpu));
-}
-
static __init int kvm_alloc_cpumask(void)
{
int cpu;
@@ -790,20 +806,11 @@ static __init int kvm_alloc_cpumask(void)
if (alloc)
for_each_possible_cpu(cpu) {
- if (!zalloc_cpumask_var_node(
- per_cpu_ptr(&__pv_cpu_mask, cpu),
- GFP_KERNEL, cpu_to_node(cpu))) {
- goto zalloc_cpumask_fail;
- }
+ zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
}
- apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
- pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
return 0;
-
-zalloc_cpumask_fail:
- kvm_free_pv_cpu_mask();
- return -ENOMEM;
}
arch_initcall(kvm_alloc_cpumask);
@@ -968,7 +975,7 @@ void arch_haltpoll_disable(unsigned int cpu)
if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
return;
- /* Enable guest halt poll disables host halt poll */
+ /* Disable guest halt poll enables host halt poll */
smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index baa21090c9be..8f06449aab27 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -24,7 +24,6 @@
#include <asm/irqdomain.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
-#include <asm/io_apic.h>
#include <asm/proto.h>
#include <asm/bios_ebda.h>
#include <asm/e820/api.h>
@@ -46,11 +45,6 @@ static int __init mpf_checksum(unsigned char *mp, int len)
return sum & 0xFF;
}
-int __init default_mpc_apic_id(struct mpc_cpu *m)
-{
- return m->apicid;
-}
-
static void __init MP_processor_info(struct mpc_cpu *m)
{
int apicid;
@@ -61,7 +55,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
return;
}
- apicid = x86_init.mpparse.mpc_apic_id(m);
+ apicid = m->apicid;
if (m->cpuflag & CPU_BOOTPROCESSOR) {
bootup_cpu = " (Bootup-CPU)";
@@ -73,7 +67,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
}
#ifdef CONFIG_X86_IO_APIC
-void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str)
+static void __init mpc_oem_bus_info(struct mpc_bus *m, char *str)
{
memcpy(str, m->bustype, 6);
str[6] = 0;
@@ -84,7 +78,7 @@ static void __init MP_bus_info(struct mpc_bus *m)
{
char str[7];
- x86_init.mpparse.mpc_oem_bus_info(m, str);
+ mpc_oem_bus_info(m, str);
#if MAX_MP_BUSSES < 256
if (m->busid >= MAX_MP_BUSSES) {
@@ -100,9 +94,6 @@ static void __init MP_bus_info(struct mpc_bus *m)
mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
#endif
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
- if (x86_init.mpparse.mpc_oem_pci_bus)
- x86_init.mpparse.mpc_oem_pci_bus(m);
-
clear_bit(m->busid, mp_bus_not_pci);
#ifdef CONFIG_EISA
mp_bus_id_to_type[m->busid] = MP_BUS_PCI;
@@ -198,8 +189,6 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
1, mpc, mpc->length, 1);
}
-void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
-
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@@ -218,14 +207,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
if (early)
return 1;
- if (mpc->oemptr)
- x86_init.mpparse.smp_read_mpc_oem(mpc);
-
- /*
- * Now process the configuration blocks.
- */
- x86_init.mpparse.mpc_record(0);
-
+ /* Now process the configuration blocks. */
while (count < mpc->length) {
switch (*mpt) {
case MP_PROCESSOR:
@@ -256,7 +238,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
count = mpc->length;
break;
}
- x86_init.mpparse.mpc_record(1);
}
if (!num_processors)
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 49dcfb85e773..c0d409810658 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -80,18 +80,30 @@ static ssize_t msr_read(struct file *file, char __user *buf,
static int filter_write(u32 reg)
{
+ /*
+ * MSRs writes usually happen all at once, and can easily saturate kmsg.
+ * Only allow one message every 30 seconds.
+ *
+ * It's possible to be smarter here and do it (for example) per-MSR, but
+ * it would certainly be more complex, and this is enough at least to
+ * avoid saturating the ring buffer.
+ */
+ static DEFINE_RATELIMIT_STATE(fw_rs, 30 * HZ, 1);
+
switch (allow_writes) {
case MSR_WRITES_ON: return 0;
case MSR_WRITES_OFF: return -EPERM;
default: break;
}
+ if (!__ratelimit(&fw_rs))
+ return 0;
+
if (reg == MSR_IA32_ENERGY_PERF_BIAS)
return 0;
- pr_err_ratelimited("Write to unrecognized MSR 0x%x by %s\n"
- "Please report to x86@kernel.org\n",
- reg, current->comm);
+ pr_err("Write to unrecognized MSR 0x%x by %s (pid: %d). Please report to x86@kernel.org.\n",
+ reg, current->comm, current->pid);
return 0;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4fc9954a9560..4bc77aaf1303 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -33,6 +33,7 @@
#include <asm/reboot.h>
#include <asm/cache.h>
#include <asm/nospec-branch.h>
+#include <asm/sev-es.h>
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
@@ -102,7 +103,6 @@ fs_initcall(nmi_warning_debugfs);
static void nmi_check_duration(struct nmiaction *action, u64 duration)
{
- u64 whole_msecs = READ_ONCE(action->max_duration);
int remainder_ns, decimal_msecs;
if (duration < nmi_longest_ns || duration < action->max_duration)
@@ -110,12 +110,12 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
action->max_duration = duration;
- remainder_ns = do_div(whole_msecs, (1000 * 1000));
+ remainder_ns = do_div(duration, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
printk_ratelimited(KERN_INFO
"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
- action->handler, whole_msecs, decimal_msecs);
+ action->handler, duration, decimal_msecs);
}
static int nmi_handle(unsigned int type, struct pt_regs *regs)
@@ -477,6 +477,12 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
{
bool irq_state;
+ /*
+ * Re-enable NMIs right here when running as an SEV-ES guest. This might
+ * cause nested NMIs, but those can be handled safely.
+ */
+ sev_es_nmi_complete();
+
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
@@ -488,6 +494,12 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
this_cpu_write(nmi_cr2, read_cr2());
nmi_restart:
+ /*
+ * Needs to happen before DR7 is accessed, because the hypervisor can
+ * intercept DR7 reads/writes, turning those into #VC exceptions.
+ */
+ sev_es_ist_enter(regs);
+
this_cpu_write(nmi_dr7, local_db_save());
irq_state = idtentry_enter_nmi(regs);
@@ -501,6 +513,8 @@ nmi_restart:
local_db_restore(this_cpu_read(nmi_dr7));
+ sev_es_ist_exit();
+
if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
write_cr2(this_cpu_read(nmi_cr2));
if (this_cpu_dec_return(nmi_state))
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index de2138ba38e5..6c3407ba6ee9 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -263,13 +263,8 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
struct pv_info pv_info = {
.name = "bare hardware",
#ifdef CONFIG_PARAVIRT_XXL
- .kernel_rpl = 0,
- .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
-
-#ifdef CONFIG_X86_64
.extra_user_64bit_cs = __USER_CS,
#endif
-#endif
};
/* 64-bit pagetable entries */
@@ -305,9 +300,7 @@ struct paravirt_patch_template pv_ops = {
.cpu.load_idt = native_load_idt,
.cpu.store_tr = native_store_tr,
.cpu.load_tls = native_load_tls,
-#ifdef CONFIG_X86_64
.cpu.load_gs_index = native_load_gs_index,
-#endif
.cpu.write_ldt_entry = native_write_ldt_entry,
.cpu.write_gdt_entry = native_write_gdt_entry,
.cpu.write_idt_entry = native_write_idt_entry,
@@ -317,9 +310,7 @@ struct paravirt_patch_template pv_ops = {
.cpu.load_sp0 = native_load_sp0,
-#ifdef CONFIG_X86_64
.cpu.usergs_sysret64 = native_usergs_sysret64,
-#endif
.cpu.iret = native_iret,
.cpu.swapgs = native_swapgs,
@@ -369,24 +360,16 @@ struct paravirt_patch_template pv_ops = {
.mmu.release_p4d = paravirt_nop,
.mmu.set_pte = native_set_pte,
- .mmu.set_pte_at = native_set_pte_at,
.mmu.set_pmd = native_set_pmd,
.mmu.ptep_modify_prot_start = __ptep_modify_prot_start,
.mmu.ptep_modify_prot_commit = __ptep_modify_prot_commit,
-#if CONFIG_PGTABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
- .mmu.set_pte_atomic = native_set_pte_atomic,
- .mmu.pte_clear = native_pte_clear,
- .mmu.pmd_clear = native_pmd_clear,
-#endif
.mmu.set_pud = native_set_pud,
.mmu.pmd_val = PTE_IDENT,
.mmu.make_pmd = PTE_IDENT,
-#if CONFIG_PGTABLE_LEVELS >= 4
.mmu.pud_val = PTE_IDENT,
.mmu.make_pud = PTE_IDENT,
@@ -398,8 +381,6 @@ struct paravirt_patch_template pv_ops = {
.mmu.set_pgd = native_set_pgd,
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
-#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
-#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
.mmu.pte_val = PTE_IDENT,
.mmu.pgd_val = PTE_IDENT,
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index 3eff63c090d2..ace6e334cb39 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -26,14 +26,10 @@ struct patch_xxl {
const unsigned char mmu_read_cr3[3];
const unsigned char mmu_write_cr3[3];
const unsigned char irq_restore_fl[2];
-# ifdef CONFIG_X86_64
const unsigned char cpu_wbinvd[2];
const unsigned char cpu_usergs_sysret64[6];
const unsigned char cpu_swapgs[3];
const unsigned char mov64[3];
-# else
- const unsigned char cpu_iret[1];
-# endif
};
static const struct patch_xxl patch_data_xxl = {
@@ -42,7 +38,6 @@ static const struct patch_xxl patch_data_xxl = {
.irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax
.mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax
.mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax
-# ifdef CONFIG_X86_64
.mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3
.irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq
.cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd
@@ -50,19 +45,11 @@ static const struct patch_xxl patch_data_xxl = {
0x48, 0x0f, 0x07 }, // swapgs; sysretq
.cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs
.mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax
-# else
- .mmu_write_cr3 = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3
- .irq_restore_fl = { 0x50, 0x9d }, // push %eax; popf
- .cpu_iret = { 0xcf }, // iret
-# endif
};
unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len)
{
-#ifdef CONFIG_X86_64
return PATCH(xxl, mov64, insn_buff, len);
-#endif
- return 0;
}
# endif /* CONFIG_PARAVIRT_XXL */
@@ -98,13 +85,9 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
-# ifdef CONFIG_X86_64
PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
PATCH_CASE(cpu, swapgs, xxl, insn_buff, len);
PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
-# else
- PATCH_CASE(cpu, iret, xxl, insn_buff, len);
-# endif
#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 5dcedad21dff..de234e7a8962 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
-#include <linux/dma-debug.h>
#include <linux/iommu.h>
#include <linux/dmar.h>
#include <linux/export.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 13ce616cc7af..ba4593a913fa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -42,6 +42,7 @@
#include <asm/spec-ctrl.h>
#include <asm/io_bitmap.h>
#include <asm/proto.h>
+#include <asm/frame.h>
#include "process.h"
@@ -133,7 +134,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
- frame->bp = 0;
+ frame->bp = encode_frame_pointer(childregs);
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9afefe325acb..df342bedea88 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -407,7 +407,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
{
unsigned long gsbase;
- if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
unsigned long flags;
local_irq_save(flags);
@@ -422,7 +422,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
{
- if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+ if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
unsigned long flags;
local_irq_save(flags);
@@ -439,7 +439,7 @@ unsigned long x86_fsbase_read_task(struct task_struct *task)
if (task == current)
fsbase = x86_fsbase_read_cpu();
- else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
+ else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
(task->thread.fsindex == 0))
fsbase = task->thread.fsbase;
else
@@ -454,7 +454,7 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
if (task == current)
gsbase = x86_gsbase_read_cpu_inactive();
- else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
+ else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
(task->thread.gsindex == 0))
gsbase = task->thread.gsbase;
else
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e7537c5440bb..bedca011459c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -465,7 +465,7 @@ static void ptrace_triggered(struct perf_event *bp,
break;
}
- thread->debugreg6 |= (DR_TRAP0 << i);
+ thread->virtual_dr6 |= (DR_TRAP0 << i);
}
/*
@@ -601,7 +601,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
if (bp)
val = bp->hw.info.address;
} else if (n == 6) {
- val = thread->debugreg6;
+ val = thread->virtual_dr6 ^ DR6_RESERVED; /* Flip back to arch polarity */
} else if (n == 7) {
val = thread->ptrace_dr7;
}
@@ -657,7 +657,7 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n,
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
} else if (n == 6) {
- thread->debugreg6 = val;
+ thread->virtual_dr6 = val ^ DR6_RESERVED; /* Flip to positive polarity */
rc = 0;
} else if (n == 7) {
rc = ptrace_write_dr7(tsk, val);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1b10717c9321..6d0df6a58873 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -8,6 +8,7 @@
#include <asm/hpet.h>
#include <asm/setup.h>
+#include <asm/mce.h>
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
amd_disable_seq_and_redirect_scrub);
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
-#include <linux/jump_label.h>
-#include <asm/string_64.h>
-
/* Ivy Bridge, Haswell, Broadwell */
static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
{
@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
pci_read_config_dword(pdev, 0x84, &capid0);
if (capid0 & 0x10)
- static_branch_inc(&mcsafe_key);
+ enable_copy_mc_fragile();
}
/* Skylake */
@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
* enabled, so memory machine check recovery is also enabled.
*/
if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
- static_branch_inc(&mcsafe_key);
+ enable_copy_mc_fragile();
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
#endif
-#endif
bool x86_apple_machine;
EXPORT_SYMBOL(x86_apple_machine);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a515e2d230b7..db115943e8bd 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -10,7 +10,7 @@
#include <linux/sched.h>
#include <linux/tboot.h>
#include <linux/delay.h>
-#include <linux/frame.h>
+#include <linux/objtool.h>
#include <linux/pgtable.h>
#include <acpi/reboot.h>
#include <asm/io.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3511736fbc74..84f581c91db4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -7,6 +7,7 @@
*/
#include <linux/console.h>
#include <linux/crash_dump.h>
+#include <linux/dma-map-ops.h>
#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/init_ohci1394_dma.h>
@@ -19,6 +20,8 @@
#include <linux/hugetlb.h>
#include <linux/tboot.h>
#include <linux/usb/xhci-dbgp.h>
+#include <linux/static_call.h>
+#include <linux/swiotlb.h>
#include <uapi/linux/mount.h>
@@ -263,16 +266,12 @@ static void __init relocate_initrd(void)
u64 area_size = PAGE_ALIGN(ramdisk_size);
/* We need to move the initrd down into directly mapped mem */
- relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
- area_size, PAGE_SIZE);
-
+ relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0,
+ PFN_PHYS(max_pfn_mapped));
if (!relocated_ramdisk)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
- /* Note: this includes all the mem currently occupied by
- the initrd, we rely on that fact to keep the data intact. */
- memblock_reserve(relocated_ramdisk, area_size);
initrd_start = relocated_ramdisk + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
@@ -299,13 +298,13 @@ static void __init early_reserve_initrd(void)
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
}
+
static void __init reserve_initrd(void)
{
/* Assume only end is not page aligned */
u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- u64 mapped_size;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -313,12 +312,6 @@ static void __init reserve_initrd(void)
initrd_start = 0;
- mapped_size = memblock_mem_size(max_pfn_mapped);
- if (ramdisk_size >= (mapped_size>>1))
- panic("initrd too large to handle, "
- "disabling initrd (%lld needed, %lld available)\n",
- ramdisk_size, mapped_size>>1);
-
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1);
@@ -430,13 +423,13 @@ static int __init reserve_crashkernel_low(void)
{
#ifdef CONFIG_X86_64
unsigned long long base, low_base = 0, low_size = 0;
- unsigned long total_low_mem;
+ unsigned long low_mem_limit;
int ret;
- total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
+ low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX);
/* crashkernel=Y,low */
- ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
+ ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base);
if (ret) {
/*
* two parts from kernel/dma/swiotlb.c:
@@ -454,23 +447,17 @@ static int __init reserve_crashkernel_low(void)
return 0;
}
- low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN);
+ low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
(unsigned long)(low_size >> 20));
return -ENOMEM;
}
- ret = memblock_reserve(low_base, low_size);
- if (ret) {
- pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
- return ret;
- }
-
- pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
+ pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n",
(unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20),
- (unsigned long)(total_low_mem >> 20));
+ (unsigned long)(low_mem_limit >> 20));
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
@@ -514,13 +501,13 @@ static void __init reserve_crashkernel(void)
* unless "crashkernel=size[KMG],high" is specified.
*/
if (!high)
- crash_base = memblock_find_in_range(CRASH_ALIGN,
- CRASH_ADDR_LOW_MAX,
- crash_size, CRASH_ALIGN);
+ crash_base = memblock_phys_alloc_range(crash_size,
+ CRASH_ALIGN, CRASH_ALIGN,
+ CRASH_ADDR_LOW_MAX);
if (!crash_base)
- crash_base = memblock_find_in_range(CRASH_ALIGN,
- CRASH_ADDR_HIGH_MAX,
- crash_size, CRASH_ALIGN);
+ crash_base = memblock_phys_alloc_range(crash_size,
+ CRASH_ALIGN, CRASH_ALIGN,
+ CRASH_ADDR_HIGH_MAX);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
@@ -528,19 +515,13 @@ static void __init reserve_crashkernel(void)
} else {
unsigned long long start;
- start = memblock_find_in_range(crash_base,
- crash_base + crash_size,
- crash_size, 1 << 20);
+ start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base,
+ crash_base + crash_size);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
- ret = memblock_reserve(crash_base, crash_size);
- if (ret) {
- pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
- return;
- }
if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
memblock_free(crash_base, crash_size);
@@ -849,6 +830,7 @@ void __init setup_arch(char **cmdline_p)
early_cpu_init();
arch_init_ideal_nops();
jump_label_init();
+ static_call_init();
early_ioremap_init();
setup_olpc_ofw_pgd();
@@ -1077,6 +1059,7 @@ void __init setup_arch(char **cmdline_p)
efi_fake_memmap();
efi_find_mirror();
efi_esrt_init();
+ efi_mokvar_table_init();
/*
* The EFI specification says that boot service code won't be
@@ -1218,6 +1201,7 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
init_cpu_to_node();
+ init_gi_nodes();
io_apic_init_mappings();
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
new file mode 100644
index 000000000000..5f83ccaab877
--- /dev/null
+++ b/arch/x86/kernel/sev-es-shared.c
@@ -0,0 +1,507 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ *
+ * This file is not compiled stand-alone. It contains code shared
+ * between the pre-decompression boot code and the running Linux kernel
+ * and is included directly into both code-bases.
+ */
+
+#ifndef __BOOT_COMPRESSED
+#define error(v) pr_err(v)
+#define has_cpuflag(f) boot_cpu_has(f)
+#endif
+
+static bool __init sev_es_check_cpu_features(void)
+{
+ if (!has_cpuflag(X86_FEATURE_RDRAND)) {
+ error("RDRAND instruction not supported - no trusted source of randomness available\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void sev_es_terminate(unsigned int reason)
+{
+ u64 val = GHCB_SEV_TERMINATE;
+
+ /*
+ * Tell the hypervisor what went wrong - only reason-set 0 is
+ * currently supported.
+ */
+ val |= GHCB_SEV_TERMINATE_REASON(0, reason);
+
+ /* Request Guest Termination from Hypvervisor */
+ sev_es_wr_ghcb_msr(val);
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
+static bool sev_es_negotiate_protocol(void)
+{
+ u64 val;
+
+ /* Do the GHCB protocol version negotiation */
+ sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+
+ if (GHCB_INFO(val) != GHCB_SEV_INFO)
+ return false;
+
+ if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
+ GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
+ return false;
+
+ return true;
+}
+
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
+{
+ memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static bool vc_decoding_needed(unsigned long exit_code)
+{
+ /* Exceptions don't require to decode the instruction */
+ return !(exit_code >= SVM_EXIT_EXCP_BASE &&
+ exit_code <= SVM_EXIT_LAST_EXCP);
+}
+
+static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
+ struct pt_regs *regs,
+ unsigned long exit_code)
+{
+ enum es_result ret = ES_OK;
+
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->regs = regs;
+
+ if (vc_decoding_needed(exit_code))
+ ret = vc_decode_insn(ctxt);
+
+ return ret;
+}
+
+static void vc_finish_insn(struct es_em_ctxt *ctxt)
+{
+ ctxt->regs->ip += ctxt->insn.length;
+}
+
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
+{
+ enum es_result ret;
+
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = GHCB_PROTOCOL_MAX;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, exit_code);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) {
+ u64 info = ghcb->save.sw_exit_info_2;
+ unsigned long v;
+
+ info = ghcb->save.sw_exit_info_2;
+ v = info & SVM_EVTINJ_VEC_MASK;
+
+ /* Check if exception information from hypervisor is sane. */
+ if ((info & SVM_EVTINJ_VALID) &&
+ ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
+ ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
+ ctxt->fi.vector = v;
+ if (info & SVM_EVTINJ_VALID_ERR)
+ ctxt->fi.error_code = info >> 32;
+ ret = ES_EXCEPTION;
+ } else {
+ ret = ES_VMM_ERROR;
+ }
+ } else {
+ ret = ES_OK;
+ }
+
+ return ret;
+}
+
+/*
+ * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
+ * page yet, so it only supports the MSR based communication with the
+ * hypervisor and only the CPUID exit-code.
+ */
+void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+{
+ unsigned int fn = lower_bits(regs->ax, 32);
+ unsigned long val;
+
+ /* Only CPUID is supported via MSR protocol */
+ if (exit_code != SVM_EXIT_CPUID)
+ goto fail;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->ax = val >> 32;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->bx = val >> 32;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->cx = val >> 32;
+
+ sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
+ VMGEXIT();
+ val = sev_es_rd_ghcb_msr();
+ if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+ goto fail;
+ regs->dx = val >> 32;
+
+ /* Skip over the CPUID two-byte opcode */
+ regs->ip += 2;
+
+ return;
+
+fail:
+ sev_es_wr_ghcb_msr(GHCB_SEV_TERMINATE);
+ VMGEXIT();
+
+ /* Shouldn't get here - if we do halt the machine */
+ while (true)
+ asm volatile("hlt\n");
+}
+
+static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
+ void *src, char *buf,
+ unsigned int data_size,
+ unsigned int count,
+ bool backwards)
+{
+ int i, b = backwards ? -1 : 1;
+ enum es_result ret = ES_OK;
+
+ for (i = 0; i < count; i++) {
+ void *s = src + (i * data_size * b);
+ char *d = buf + (i * data_size);
+
+ ret = vc_read_mem(ctxt, s, d, data_size);
+ if (ret != ES_OK)
+ break;
+ }
+
+ return ret;
+}
+
+static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
+ void *dst, char *buf,
+ unsigned int data_size,
+ unsigned int count,
+ bool backwards)
+{
+ int i, s = backwards ? -1 : 1;
+ enum es_result ret = ES_OK;
+
+ for (i = 0; i < count; i++) {
+ void *d = dst + (i * data_size * s);
+ char *b = buf + (i * data_size);
+
+ ret = vc_write_mem(ctxt, d, b, data_size);
+ if (ret != ES_OK)
+ break;
+ }
+
+ return ret;
+}
+
+#define IOIO_TYPE_STR BIT(2)
+#define IOIO_TYPE_IN 1
+#define IOIO_TYPE_INS (IOIO_TYPE_IN | IOIO_TYPE_STR)
+#define IOIO_TYPE_OUT 0
+#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
+
+#define IOIO_REP BIT(3)
+
+#define IOIO_ADDR_64 BIT(9)
+#define IOIO_ADDR_32 BIT(8)
+#define IOIO_ADDR_16 BIT(7)
+
+#define IOIO_DATA_32 BIT(6)
+#define IOIO_DATA_16 BIT(5)
+#define IOIO_DATA_8 BIT(4)
+
+#define IOIO_SEG_ES (0 << 10)
+#define IOIO_SEG_DS (3 << 10)
+
+static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
+{
+ struct insn *insn = &ctxt->insn;
+ *exitinfo = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ /* INS opcodes */
+ case 0x6c:
+ case 0x6d:
+ *exitinfo |= IOIO_TYPE_INS;
+ *exitinfo |= IOIO_SEG_ES;
+ *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ break;
+
+ /* OUTS opcodes */
+ case 0x6e:
+ case 0x6f:
+ *exitinfo |= IOIO_TYPE_OUTS;
+ *exitinfo |= IOIO_SEG_DS;
+ *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ break;
+
+ /* IN immediate opcodes */
+ case 0xe4:
+ case 0xe5:
+ *exitinfo |= IOIO_TYPE_IN;
+ *exitinfo |= (u64)insn->immediate.value << 16;
+ break;
+
+ /* OUT immediate opcodes */
+ case 0xe6:
+ case 0xe7:
+ *exitinfo |= IOIO_TYPE_OUT;
+ *exitinfo |= (u64)insn->immediate.value << 16;
+ break;
+
+ /* IN register opcodes */
+ case 0xec:
+ case 0xed:
+ *exitinfo |= IOIO_TYPE_IN;
+ *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ break;
+
+ /* OUT register opcodes */
+ case 0xee:
+ case 0xef:
+ *exitinfo |= IOIO_TYPE_OUT;
+ *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+ break;
+
+ default:
+ return ES_DECODE_FAILED;
+ }
+
+ switch (insn->opcode.bytes[0]) {
+ case 0x6c:
+ case 0x6e:
+ case 0xe4:
+ case 0xe6:
+ case 0xec:
+ case 0xee:
+ /* Single byte opcodes */
+ *exitinfo |= IOIO_DATA_8;
+ break;
+ default:
+ /* Length determined by instruction parsing */
+ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
+ : IOIO_DATA_32;
+ }
+ switch (insn->addr_bytes) {
+ case 2:
+ *exitinfo |= IOIO_ADDR_16;
+ break;
+ case 4:
+ *exitinfo |= IOIO_ADDR_32;
+ break;
+ case 8:
+ *exitinfo |= IOIO_ADDR_64;
+ break;
+ }
+
+ if (insn_has_rep_prefix(insn))
+ *exitinfo |= IOIO_REP;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ u64 exit_info_1, exit_info_2;
+ enum es_result ret;
+
+ ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
+ if (ret != ES_OK)
+ return ret;
+
+ if (exit_info_1 & IOIO_TYPE_STR) {
+
+ /* (REP) INS/OUTS */
+
+ bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
+ unsigned int io_bytes, exit_bytes;
+ unsigned int ghcb_count, op_count;
+ unsigned long es_base;
+ u64 sw_scratch;
+
+ /*
+ * For the string variants with rep prefix the amount of in/out
+ * operations per #VC exception is limited so that the kernel
+ * has a chance to take interrupts and re-schedule while the
+ * instruction is emulated.
+ */
+ io_bytes = (exit_info_1 >> 4) & 0x7;
+ ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
+
+ op_count = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
+ exit_info_2 = min(op_count, ghcb_count);
+ exit_bytes = exit_info_2 * io_bytes;
+
+ es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+ /* Read bytes of OUTS into the shared buffer */
+ if (!(exit_info_1 & IOIO_TYPE_IN)) {
+ ret = vc_insn_string_read(ctxt,
+ (void *)(es_base + regs->si),
+ ghcb->shared_buffer, io_bytes,
+ exit_info_2, df);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Issue an VMGEXIT to the HV to consume the bytes from the
+ * shared buffer or to have it write them into the shared buffer
+ * depending on the instruction: OUTS or INS.
+ */
+ sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
+ ghcb_set_sw_scratch(ghcb, sw_scratch);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
+ exit_info_1, exit_info_2);
+ if (ret != ES_OK)
+ return ret;
+
+ /* Read bytes from shared buffer into the guest's destination. */
+ if (exit_info_1 & IOIO_TYPE_IN) {
+ ret = vc_insn_string_write(ctxt,
+ (void *)(es_base + regs->di),
+ ghcb->shared_buffer, io_bytes,
+ exit_info_2, df);
+ if (ret)
+ return ret;
+
+ if (df)
+ regs->di -= exit_bytes;
+ else
+ regs->di += exit_bytes;
+ } else {
+ if (df)
+ regs->si -= exit_bytes;
+ else
+ regs->si += exit_bytes;
+ }
+
+ if (exit_info_1 & IOIO_REP)
+ regs->cx -= exit_info_2;
+
+ ret = regs->cx ? ES_RETRY : ES_OK;
+
+ } else {
+
+ /* IN/OUT into/from rAX */
+
+ int bits = (exit_info_1 & 0x70) >> 1;
+ u64 rax = 0;
+
+ if (!(exit_info_1 & IOIO_TYPE_IN))
+ rax = lower_bits(regs->ax, bits);
+
+ ghcb_set_rax(ghcb, rax);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (exit_info_1 & IOIO_TYPE_IN) {
+ if (!ghcb_rax_is_valid(ghcb))
+ return ES_VMM_ERROR;
+ regs->ax = lower_bits(ghcb->save.rax, bits);
+ }
+ }
+
+ return ret;
+}
+
+static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ u32 cr4 = native_read_cr4();
+ enum es_result ret;
+
+ ghcb_set_rax(ghcb, regs->ax);
+ ghcb_set_rcx(ghcb, regs->cx);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #GP - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ regs->ax = ghcb->save.rax;
+ regs->bx = ghcb->save.rbx;
+ regs->cx = ghcb->save.rcx;
+ regs->dx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ unsigned long exit_code)
+{
+ bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
+ enum es_result ret;
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
+ (!rdtscp || ghcb_rcx_is_valid(ghcb))))
+ return ES_VMM_ERROR;
+
+ ctxt->regs->ax = ghcb->save.rax;
+ ctxt->regs->dx = ghcb->save.rdx;
+ if (rdtscp)
+ ctxt->regs->cx = ghcb->save.rcx;
+
+ return ES_OK;
+}
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
new file mode 100644
index 000000000000..4a96726fbaf8
--- /dev/null
+++ b/arch/x86/kernel/sev-es.c
@@ -0,0 +1,1404 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#define pr_fmt(fmt) "SEV-ES: " fmt
+
+#include <linux/sched/debug.h> /* For show_regs() */
+#include <linux/percpu-defs.h>
+#include <linux/mem_encrypt.h>
+#include <linux/lockdep.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/set_memory.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/stacktrace.h>
+#include <asm/sev-es.h>
+#include <asm/insn-eval.h>
+#include <asm/fpu/internal.h>
+#include <asm/processor.h>
+#include <asm/realmode.h>
+#include <asm/traps.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+
+#define DR7_RESET_VALUE 0x400
+
+/* For early boot hypervisor communication in SEV-ES enabled guests */
+static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
+
+/*
+ * Needs to be in the .data section because we need it NULL before bss is
+ * cleared
+ */
+static struct ghcb __initdata *boot_ghcb;
+
+/* #VC handler runtime per-CPU data */
+struct sev_es_runtime_data {
+ struct ghcb ghcb_page;
+
+ /* Physical storage for the per-CPU IST stack of the #VC handler */
+ char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+
+ /*
+ * Physical storage for the per-CPU fall-back stack of the #VC handler.
+ * The fall-back stack is used when it is not safe to switch back to the
+ * interrupted stack in the #VC entry code.
+ */
+ char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+
+ /*
+ * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+ * It is needed when an NMI happens while the #VC handler uses the real
+ * GHCB, and the NMI handler itself is causing another #VC exception. In
+ * that case the GHCB content of the first handler needs to be backed up
+ * and restored.
+ */
+ struct ghcb backup_ghcb;
+
+ /*
+ * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+ * There is no need for it to be atomic, because nothing is written to
+ * the GHCB between the read and the write of ghcb_active. So it is safe
+ * to use it when a nested #VC exception happens before the write.
+ *
+ * This is necessary for example in the #VC->NMI->#VC case when the NMI
+ * happens while the first #VC handler uses the GHCB. When the NMI code
+ * raises a second #VC handler it might overwrite the contents of the
+ * GHCB written by the first handler. To avoid this the content of the
+ * GHCB is saved and restored when the GHCB is detected to be in use
+ * already.
+ */
+ bool ghcb_active;
+ bool backup_ghcb_active;
+
+ /*
+ * Cached DR7 value - write it on DR7 writes and return it on reads.
+ * That value will never make it to the real hardware DR7 as debugging
+ * is currently unsupported in SEV-ES guests.
+ */
+ unsigned long dr7;
+};
+
+struct ghcb_state {
+ struct ghcb *ghcb;
+};
+
+static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
+DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+
+/* Needed in vc_early_forward_exception */
+void do_early_exception(struct pt_regs *regs, int trapnr);
+
+static void __init setup_vc_stacks(int cpu)
+{
+ struct sev_es_runtime_data *data;
+ struct cpu_entry_area *cea;
+ unsigned long vaddr;
+ phys_addr_t pa;
+
+ data = per_cpu(runtime_data, cpu);
+ cea = get_cpu_entry_area(cpu);
+
+ /* Map #VC IST stack */
+ vaddr = CEA_ESTACK_BOT(&cea->estacks, VC);
+ pa = __pa(data->ist_stack);
+ cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+
+ /* Map VC fall-back stack */
+ vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2);
+ pa = __pa(data->fallback_stack);
+ cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+}
+
+static __always_inline bool on_vc_stack(unsigned long sp)
+{
+ return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
+}
+
+/*
+ * This function handles the case when an NMI is raised in the #VC exception
+ * handler entry code. In this case, the IST entry for #VC must be adjusted, so
+ * that any subsequent #VC exception will not overwrite the stack contents of the
+ * interrupted #VC handler.
+ *
+ * The IST entry is adjusted unconditionally so that it can be also be
+ * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested
+ * sev_es_ist_exit() call may adjust back the IST entry too early.
+ */
+void noinstr __sev_es_ist_enter(struct pt_regs *regs)
+{
+ unsigned long old_ist, new_ist;
+
+ /* Read old IST entry */
+ old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+ /* Make room on the IST stack */
+ if (on_vc_stack(regs->sp))
+ new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
+ else
+ new_ist = old_ist - sizeof(old_ist);
+
+ /* Store old IST entry */
+ *(unsigned long *)new_ist = old_ist;
+
+ /* Set new IST entry */
+ this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
+}
+
+void noinstr __sev_es_ist_exit(void)
+{
+ unsigned long ist;
+
+ /* Read IST entry */
+ ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+ if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
+ return;
+
+ /* Read back old IST entry and write it to the TSS */
+ this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
+}
+
+static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (unlikely(data->ghcb_active)) {
+ /* GHCB is already in use - save its contents */
+
+ if (unlikely(data->backup_ghcb_active))
+ return NULL;
+
+ /* Mark backup_ghcb active before writing to it */
+ data->backup_ghcb_active = true;
+
+ state->ghcb = &data->backup_ghcb;
+
+ /* Backup GHCB content */
+ *state->ghcb = *ghcb;
+ } else {
+ state->ghcb = NULL;
+ data->ghcb_active = true;
+ }
+
+ return ghcb;
+}
+
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ if (state->ghcb) {
+ /* Restore GHCB from Backup */
+ *ghcb = *state->ghcb;
+ data->backup_ghcb_active = false;
+ state->ghcb = NULL;
+ } else {
+ data->ghcb_active = false;
+ }
+}
+
+/* Needed in vc_early_forward_exception */
+void do_early_exception(struct pt_regs *regs, int trapnr);
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+ return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static inline void sev_es_wr_ghcb_msr(u64 val)
+{
+ u32 low, high;
+
+ low = (u32)(val);
+ high = (u32)(val >> 32);
+
+ native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
+}
+
+static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
+ unsigned char *buffer)
+{
+ return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ enum es_result ret;
+ int res;
+
+ if (user_mode(ctxt->regs)) {
+ res = insn_fetch_from_user(ctxt->regs, buffer);
+ if (!res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
+ }
+
+ if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
+ return ES_DECODE_FAILED;
+ } else {
+ res = vc_fetch_insn_kernel(ctxt, buffer);
+ if (res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
+ }
+
+ insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE - res, 1);
+ insn_get_length(&ctxt->insn);
+ }
+
+ ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+
+ return ret;
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+ char *dst, char *buf, size_t size)
+{
+ unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+ char __user *target = (char __user *)dst;
+ u64 d8;
+ u32 d4;
+ u16 d2;
+ u8 d1;
+
+ switch (size) {
+ case 1:
+ memcpy(&d1, buf, 1);
+ if (put_user(d1, target))
+ goto fault;
+ break;
+ case 2:
+ memcpy(&d2, buf, 2);
+ if (put_user(d2, target))
+ goto fault;
+ break;
+ case 4:
+ memcpy(&d4, buf, 4);
+ if (put_user(d4, target))
+ goto fault;
+ break;
+ case 8:
+ memcpy(&d8, buf, 8);
+ if (put_user(d8, target))
+ goto fault;
+ break;
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return ES_UNSUPPORTED;
+ }
+
+ return ES_OK;
+
+fault:
+ if (user_mode(ctxt->regs))
+ error_code |= X86_PF_USER;
+
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = error_code;
+ ctxt->fi.cr2 = (unsigned long)dst;
+
+ return ES_EXCEPTION;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+ char *src, char *buf, size_t size)
+{
+ unsigned long error_code = X86_PF_PROT;
+ char __user *s = (char __user *)src;
+ u64 d8;
+ u32 d4;
+ u16 d2;
+ u8 d1;
+
+ switch (size) {
+ case 1:
+ if (get_user(d1, s))
+ goto fault;
+ memcpy(buf, &d1, 1);
+ break;
+ case 2:
+ if (get_user(d2, s))
+ goto fault;
+ memcpy(buf, &d2, 2);
+ break;
+ case 4:
+ if (get_user(d4, s))
+ goto fault;
+ memcpy(buf, &d4, 4);
+ break;
+ case 8:
+ if (get_user(d8, s))
+ goto fault;
+ memcpy(buf, &d8, 8);
+ break;
+ default:
+ WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+ return ES_UNSUPPORTED;
+ }
+
+ return ES_OK;
+
+fault:
+ if (user_mode(ctxt->regs))
+ error_code |= X86_PF_USER;
+
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = error_code;
+ ctxt->fi.cr2 = (unsigned long)src;
+
+ return ES_EXCEPTION;
+}
+
+static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ unsigned long vaddr, phys_addr_t *paddr)
+{
+ unsigned long va = (unsigned long)vaddr;
+ unsigned int level;
+ phys_addr_t pa;
+ pgd_t *pgd;
+ pte_t *pte;
+
+ pgd = __va(read_cr3_pa());
+ pgd = &pgd[pgd_index(va)];
+ pte = lookup_address_in_pgd(pgd, va, &level);
+ if (!pte) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.cr2 = vaddr;
+ ctxt->fi.error_code = 0;
+
+ if (user_mode(ctxt->regs))
+ ctxt->fi.error_code |= X86_PF_USER;
+
+ return false;
+ }
+
+ pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
+ pa |= va & ~page_level_mask(level);
+
+ *paddr = pa;
+
+ return true;
+}
+
+/* Include code shared with pre-decompression boot stage */
+#include "sev-es-shared.c"
+
+void noinstr __sev_es_nmi_complete(void)
+{
+ struct ghcb_state state;
+ struct ghcb *ghcb;
+
+ ghcb = sev_es_get_ghcb(&state);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
+ VMGEXIT();
+
+ sev_es_put_ghcb(&state);
+}
+
+static u64 get_jump_table_addr(void)
+{
+ struct ghcb_state state;
+ unsigned long flags;
+ struct ghcb *ghcb;
+ u64 ret = 0;
+
+ local_irq_save(flags);
+
+ ghcb = sev_es_get_ghcb(&state);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
+ ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
+ ghcb_sw_exit_info_2_is_valid(ghcb))
+ ret = ghcb->save.sw_exit_info_2;
+
+ sev_es_put_ghcb(&state);
+
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
+{
+ u16 startup_cs, startup_ip;
+ phys_addr_t jump_table_pa;
+ u64 jump_table_addr;
+ u16 __iomem *jump_table;
+
+ jump_table_addr = get_jump_table_addr();
+
+ /* On UP guests there is no jump table so this is not a failure */
+ if (!jump_table_addr)
+ return 0;
+
+ /* Check if AP Jump Table is page-aligned */
+ if (jump_table_addr & ~PAGE_MASK)
+ return -EINVAL;
+
+ jump_table_pa = jump_table_addr & PAGE_MASK;
+
+ startup_cs = (u16)(rmh->trampoline_start >> 4);
+ startup_ip = (u16)(rmh->sev_es_trampoline_start -
+ rmh->trampoline_start);
+
+ jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
+ if (!jump_table)
+ return -EIO;
+
+ writew(startup_ip, &jump_table[0]);
+ writew(startup_cs, &jump_table[1]);
+
+ iounmap(jump_table);
+
+ return 0;
+}
+
+/*
+ * This is needed by the OVMF UEFI firmware which will use whatever it finds in
+ * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
+ * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
+ */
+int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
+{
+ struct sev_es_runtime_data *data;
+ unsigned long address, pflags;
+ int cpu;
+ u64 pfn;
+
+ if (!sev_es_active())
+ return 0;
+
+ pflags = _PAGE_NX | _PAGE_RW;
+
+ for_each_possible_cpu(cpu) {
+ data = per_cpu(runtime_data, cpu);
+
+ address = __pa(&data->ghcb_page);
+ pfn = address >> PAGE_SHIFT;
+
+ if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
+ return 1;
+ }
+
+ return 0;
+}
+
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ struct pt_regs *regs = ctxt->regs;
+ enum es_result ret;
+ u64 exit_info_1;
+
+ /* Is it a WRMSR? */
+ exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
+
+ ghcb_set_rcx(ghcb, regs->cx);
+ if (exit_info_1) {
+ ghcb_set_rax(ghcb, regs->ax);
+ ghcb_set_rdx(ghcb, regs->dx);
+ }
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
+
+ if ((ret == ES_OK) && (!exit_info_1)) {
+ regs->ax = ghcb->save.rax;
+ regs->dx = ghcb->save.rdx;
+ }
+
+ return ret;
+}
+
+/*
+ * This function runs on the first #VC exception after the kernel
+ * switched to virtual addresses.
+ */
+static bool __init sev_es_setup_ghcb(void)
+{
+ /* First make sure the hypervisor talks a supported protocol. */
+ if (!sev_es_negotiate_protocol())
+ return false;
+
+ /*
+ * Clear the boot_ghcb. The first exception comes in before the bss
+ * section is cleared.
+ */
+ memset(&boot_ghcb_page, 0, PAGE_SIZE);
+
+ /* Alright - Make the boot-ghcb public */
+ boot_ghcb = &boot_ghcb_page;
+
+ return true;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void sev_es_ap_hlt_loop(void)
+{
+ struct ghcb_state state;
+ struct ghcb *ghcb;
+
+ ghcb = sev_es_get_ghcb(&state);
+
+ while (true) {
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
+ ghcb_set_sw_exit_info_1(ghcb, 0);
+ ghcb_set_sw_exit_info_2(ghcb, 0);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ /* Wakeup signal? */
+ if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
+ ghcb->save.sw_exit_info_2)
+ break;
+ }
+
+ sev_es_put_ghcb(&state);
+}
+
+/*
+ * Play_dead handler when running under SEV-ES. This is needed because
+ * the hypervisor can't deliver an SIPI request to restart the AP.
+ * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
+ * hypervisor wakes it up again.
+ */
+static void sev_es_play_dead(void)
+{
+ play_dead_common();
+
+ /* IRQs now disabled */
+
+ sev_es_ap_hlt_loop();
+
+ /*
+ * If we get here, the VCPU was woken up again. Jump to CPU
+ * startup code to get it back online.
+ */
+ start_cpu0();
+}
+#else /* CONFIG_HOTPLUG_CPU */
+#define sev_es_play_dead native_play_dead
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+static void __init sev_es_setup_play_dead(void)
+{
+ smp_ops.play_dead = sev_es_play_dead;
+}
+#else
+static inline void sev_es_setup_play_dead(void) { }
+#endif
+
+static void __init alloc_runtime_data(int cpu)
+{
+ struct sev_es_runtime_data *data;
+
+ data = memblock_alloc(sizeof(*data), PAGE_SIZE);
+ if (!data)
+ panic("Can't allocate SEV-ES runtime data");
+
+ per_cpu(runtime_data, cpu) = data;
+}
+
+static void __init init_ghcb(int cpu)
+{
+ struct sev_es_runtime_data *data;
+ int err;
+
+ data = per_cpu(runtime_data, cpu);
+
+ err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
+ sizeof(data->ghcb_page));
+ if (err)
+ panic("Can't map GHCBs unencrypted");
+
+ memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
+
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+}
+
+void __init sev_es_init_vc_handling(void)
+{
+ int cpu;
+
+ BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
+
+ if (!sev_es_active())
+ return;
+
+ if (!sev_es_check_cpu_features())
+ panic("SEV-ES CPU Features missing");
+
+ /* Enable SEV-ES special handling */
+ static_branch_enable(&sev_es_enable_key);
+
+ /* Initialize per-cpu GHCB pages */
+ for_each_possible_cpu(cpu) {
+ alloc_runtime_data(cpu);
+ init_ghcb(cpu);
+ setup_vc_stacks(cpu);
+ }
+
+ sev_es_setup_play_dead();
+
+ /* Secondary CPUs use the runtime #VC handler */
+ initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
+}
+
+static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
+{
+ int trapnr = ctxt->fi.vector;
+
+ if (trapnr == X86_TRAP_PF)
+ native_write_cr2(ctxt->fi.cr2);
+
+ ctxt->regs->orig_ax = ctxt->fi.error_code;
+ do_early_exception(ctxt->regs, trapnr);
+}
+
+static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
+{
+ long *reg_array;
+ int offset;
+
+ reg_array = (long *)ctxt->regs;
+ offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
+
+ if (offset < 0)
+ return NULL;
+
+ offset /= sizeof(long);
+
+ return reg_array + offset;
+}
+
+static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
+{
+ long *reg_array;
+ int offset;
+
+ reg_array = (long *)ctxt->regs;
+ offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
+
+ if (offset < 0)
+ return NULL;
+
+ offset /= sizeof(long);
+
+ return reg_array + offset;
+}
+static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ unsigned int bytes, bool read)
+{
+ u64 exit_code, exit_info_1, exit_info_2;
+ unsigned long ghcb_pa = __pa(ghcb);
+ phys_addr_t paddr;
+ void __user *ref;
+
+ ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
+ if (ref == (void __user *)-1L)
+ return ES_UNSUPPORTED;
+
+ exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
+
+ if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) {
+ if (!read)
+ ctxt->fi.error_code |= X86_PF_WRITE;
+
+ return ES_EXCEPTION;
+ }
+
+ exit_info_1 = paddr;
+ /* Can never be greater than 8 */
+ exit_info_2 = bytes;
+
+ ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
+
+ return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
+}
+
+static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct insn *insn = &ctxt->insn;
+ unsigned int bytes = 0;
+ enum es_result ret;
+ int sign_byte;
+ long *reg_data;
+
+ switch (insn->opcode.bytes[1]) {
+ /* MMIO Read w/ zero-extension */
+ case 0xb6:
+ bytes = 1;
+ fallthrough;
+ case 0xb7:
+ if (!bytes)
+ bytes = 2;
+
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ /* Zero extend based on operand size */
+ reg_data = vc_insn_get_reg(ctxt);
+ if (!reg_data)
+ return ES_DECODE_FAILED;
+
+ memset(reg_data, 0, insn->opnd_bytes);
+
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+
+ /* MMIO Read w/ sign-extension */
+ case 0xbe:
+ bytes = 1;
+ fallthrough;
+ case 0xbf:
+ if (!bytes)
+ bytes = 2;
+
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ /* Sign extend based on operand size */
+ reg_data = vc_insn_get_reg(ctxt);
+ if (!reg_data)
+ return ES_DECODE_FAILED;
+
+ if (bytes == 1) {
+ u8 *val = (u8 *)ghcb->shared_buffer;
+
+ sign_byte = (*val & 0x80) ? 0xff : 0x00;
+ } else {
+ u16 *val = (u16 *)ghcb->shared_buffer;
+
+ sign_byte = (*val & 0x8000) ? 0xff : 0x00;
+ }
+ memset(reg_data, sign_byte, insn->opnd_bytes);
+
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+
+ default:
+ ret = ES_UNSUPPORTED;
+ }
+
+ return ret;
+}
+
+/*
+ * The MOVS instruction has two memory operands, which raises the
+ * problem that it is not known whether the access to the source or the
+ * destination caused the #VC exception (and hence whether an MMIO read
+ * or write operation needs to be emulated).
+ *
+ * Instead of playing games with walking page-tables and trying to guess
+ * whether the source or destination is an MMIO range, split the move
+ * into two operations, a read and a write with only one memory operand.
+ * This will cause a nested #VC exception on the MMIO address which can
+ * then be handled.
+ *
+ * This implementation has the benefit that it also supports MOVS where
+ * source _and_ destination are MMIO regions.
+ *
+ * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
+ * rare operation. If it turns out to be a performance problem the split
+ * operations can be moved to memcpy_fromio() and memcpy_toio().
+ */
+static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
+ unsigned int bytes)
+{
+ unsigned long ds_base, es_base;
+ unsigned char *src, *dst;
+ unsigned char buffer[8];
+ enum es_result ret;
+ bool rep;
+ int off;
+
+ ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
+ es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+ if (ds_base == -1L || es_base == -1L) {
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+ return ES_EXCEPTION;
+ }
+
+ src = ds_base + (unsigned char *)ctxt->regs->si;
+ dst = es_base + (unsigned char *)ctxt->regs->di;
+
+ ret = vc_read_mem(ctxt, src, buffer, bytes);
+ if (ret != ES_OK)
+ return ret;
+
+ ret = vc_write_mem(ctxt, dst, buffer, bytes);
+ if (ret != ES_OK)
+ return ret;
+
+ if (ctxt->regs->flags & X86_EFLAGS_DF)
+ off = -bytes;
+ else
+ off = bytes;
+
+ ctxt->regs->si += off;
+ ctxt->regs->di += off;
+
+ rep = insn_has_rep_prefix(&ctxt->insn);
+ if (rep)
+ ctxt->regs->cx -= 1;
+
+ if (!rep || ctxt->regs->cx == 0)
+ return ES_OK;
+ else
+ return ES_RETRY;
+}
+
+static enum es_result vc_handle_mmio(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct insn *insn = &ctxt->insn;
+ unsigned int bytes = 0;
+ enum es_result ret;
+ long *reg_data;
+
+ switch (insn->opcode.bytes[0]) {
+ /* MMIO Write */
+ case 0x88:
+ bytes = 1;
+ fallthrough;
+ case 0x89:
+ if (!bytes)
+ bytes = insn->opnd_bytes;
+
+ reg_data = vc_insn_get_reg(ctxt);
+ if (!reg_data)
+ return ES_DECODE_FAILED;
+
+ memcpy(ghcb->shared_buffer, reg_data, bytes);
+
+ ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+ break;
+
+ case 0xc6:
+ bytes = 1;
+ fallthrough;
+ case 0xc7:
+ if (!bytes)
+ bytes = insn->opnd_bytes;
+
+ memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
+
+ ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+ break;
+
+ /* MMIO Read */
+ case 0x8a:
+ bytes = 1;
+ fallthrough;
+ case 0x8b:
+ if (!bytes)
+ bytes = insn->opnd_bytes;
+
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ reg_data = vc_insn_get_reg(ctxt);
+ if (!reg_data)
+ return ES_DECODE_FAILED;
+
+ /* Zero-extend for 32-bit operation */
+ if (bytes == 4)
+ *reg_data = 0;
+
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+
+ /* MOVS instruction */
+ case 0xa4:
+ bytes = 1;
+ fallthrough;
+ case 0xa5:
+ if (!bytes)
+ bytes = insn->opnd_bytes;
+
+ ret = vc_handle_mmio_movs(ctxt, bytes);
+ break;
+ /* Two-Byte Opcodes */
+ case 0x0f:
+ ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
+ break;
+ default:
+ ret = ES_UNSUPPORTED;
+ }
+
+ return ret;
+}
+
+static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+ long val, *reg = vc_insn_get_rm(ctxt);
+ enum es_result ret;
+
+ if (!reg)
+ return ES_DECODE_FAILED;
+
+ val = *reg;
+
+ /* Upper 32 bits must be written as zeroes */
+ if (val >> 32) {
+ ctxt->fi.vector = X86_TRAP_GP;
+ ctxt->fi.error_code = 0;
+ return ES_EXCEPTION;
+ }
+
+ /* Clear out other reserved bits and set bit 10 */
+ val = (val & 0xffff23ffL) | BIT(10);
+
+ /* Early non-zero writes to DR7 are not supported */
+ if (!data && (val & ~DR7_RESET_VALUE))
+ return ES_UNSUPPORTED;
+
+ /* Using a value of 0 for ExitInfo1 means RAX holds the value */
+ ghcb_set_rax(ghcb, val);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (data)
+ data->dr7 = val;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+ long *reg = vc_insn_get_rm(ctxt);
+
+ if (!reg)
+ return ES_DECODE_FAILED;
+
+ if (data)
+ *reg = data->dr7;
+ else
+ *reg = DR7_RESET_VALUE;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
+}
+
+static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ enum es_result ret;
+
+ ghcb_set_rcx(ghcb, ctxt->regs->cx);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ ctxt->regs->ax = ghcb->save.rax;
+ ctxt->regs->dx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_monitor(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ /*
+ * Treat it as a NOP and do not leak a physical address to the
+ * hypervisor.
+ */
+ return ES_OK;
+}
+
+static enum es_result vc_handle_mwait(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ /* Treat the same as MONITOR/MONITORX */
+ return ES_OK;
+}
+
+static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ enum es_result ret;
+
+ ghcb_set_rax(ghcb, ctxt->regs->ax);
+ ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
+
+ if (x86_platform.hyper.sev_es_hcall_prepare)
+ x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!ghcb_rax_is_valid(ghcb))
+ return ES_VMM_ERROR;
+
+ ctxt->regs->ax = ghcb->save.rax;
+
+ /*
+ * Call sev_es_hcall_finish() after regs->ax is already set.
+ * This allows the hypervisor handler to overwrite it again if
+ * necessary.
+ */
+ if (x86_platform.hyper.sev_es_hcall_finish &&
+ !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
+ return ES_VMM_ERROR;
+
+ return ES_OK;
+}
+
+static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt)
+{
+ /*
+ * Calling ecx_alignment_check() directly does not work, because it
+ * enables IRQs and the GHCB is active. Forward the exception and call
+ * it later from vc_forward_exception().
+ */
+ ctxt->fi.vector = X86_TRAP_AC;
+ ctxt->fi.error_code = 0;
+ return ES_EXCEPTION;
+}
+
+static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ noist_exc_debug(regs);
+ else
+ exc_debug(regs);
+}
+
+static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
+ struct ghcb *ghcb,
+ unsigned long exit_code)
+{
+ enum es_result result;
+
+ switch (exit_code) {
+ case SVM_EXIT_READ_DR7:
+ result = vc_handle_dr7_read(ghcb, ctxt);
+ break;
+ case SVM_EXIT_WRITE_DR7:
+ result = vc_handle_dr7_write(ghcb, ctxt);
+ break;
+ case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
+ result = vc_handle_trap_ac(ghcb, ctxt);
+ break;
+ case SVM_EXIT_RDTSC:
+ case SVM_EXIT_RDTSCP:
+ result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
+ break;
+ case SVM_EXIT_RDPMC:
+ result = vc_handle_rdpmc(ghcb, ctxt);
+ break;
+ case SVM_EXIT_INVD:
+ pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
+ result = ES_UNSUPPORTED;
+ break;
+ case SVM_EXIT_CPUID:
+ result = vc_handle_cpuid(ghcb, ctxt);
+ break;
+ case SVM_EXIT_IOIO:
+ result = vc_handle_ioio(ghcb, ctxt);
+ break;
+ case SVM_EXIT_MSR:
+ result = vc_handle_msr(ghcb, ctxt);
+ break;
+ case SVM_EXIT_VMMCALL:
+ result = vc_handle_vmmcall(ghcb, ctxt);
+ break;
+ case SVM_EXIT_WBINVD:
+ result = vc_handle_wbinvd(ghcb, ctxt);
+ break;
+ case SVM_EXIT_MONITOR:
+ result = vc_handle_monitor(ghcb, ctxt);
+ break;
+ case SVM_EXIT_MWAIT:
+ result = vc_handle_mwait(ghcb, ctxt);
+ break;
+ case SVM_EXIT_NPF:
+ result = vc_handle_mmio(ghcb, ctxt);
+ break;
+ default:
+ /*
+ * Unexpected #VC exception
+ */
+ result = ES_UNSUPPORTED;
+ }
+
+ return result;
+}
+
+static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+ long error_code = ctxt->fi.error_code;
+ int trapnr = ctxt->fi.vector;
+
+ ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+ switch (trapnr) {
+ case X86_TRAP_GP:
+ exc_general_protection(ctxt->regs, error_code);
+ break;
+ case X86_TRAP_UD:
+ exc_invalid_op(ctxt->regs);
+ break;
+ case X86_TRAP_AC:
+ exc_alignment_check(ctxt->regs, error_code);
+ break;
+ default:
+ pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
+ BUG();
+ }
+}
+
+static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
+{
+ unsigned long sp = (unsigned long)regs;
+
+ return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
+}
+
+/*
+ * Main #VC exception handler. It is called when the entry code was able to
+ * switch off the IST to a safe kernel stack.
+ *
+ * With the current implementation it is always possible to switch to a safe
+ * stack because #VC exceptions only happen at known places, like intercepted
+ * instructions or accesses to MMIO areas/IO ports. They can also happen with
+ * code instrumentation when the hypervisor intercepts #DB, but the critical
+ * paths are forbidden to be instrumented, so #DB exceptions currently also
+ * only happen in safe places.
+ */
+DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
+{
+ struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+ struct ghcb_state state;
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+ struct ghcb *ghcb;
+
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * Handle #DB before calling into !noinstr code to avoid recursive #DB.
+ */
+ if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
+ vc_handle_trap_db(regs);
+ return;
+ }
+
+ instrumentation_begin();
+
+ /*
+ * This is invoked through an interrupt gate, so IRQs are disabled. The
+ * code below might walk page-tables for user or kernel addresses, so
+ * keep the IRQs disabled to protect us against concurrent TLB flushes.
+ */
+
+ ghcb = sev_es_get_ghcb(&state);
+ if (!ghcb) {
+ /*
+ * Mark GHCBs inactive so that panic() is able to print the
+ * message.
+ */
+ data->ghcb_active = false;
+ data->backup_ghcb_active = false;
+
+ panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+ }
+
+ vc_ghcb_invalidate(ghcb);
+ result = vc_init_em_ctxt(&ctxt, regs, error_code);
+
+ if (result == ES_OK)
+ result = vc_handle_exitcode(&ctxt, ghcb, error_code);
+
+ sev_es_put_ghcb(&state);
+
+ /* Done - now check the result */
+ switch (result) {
+ case ES_OK:
+ vc_finish_insn(&ctxt);
+ break;
+ case ES_UNSUPPORTED:
+ pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+ error_code, regs->ip);
+ goto fail;
+ case ES_VMM_ERROR:
+ pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+ error_code, regs->ip);
+ goto fail;
+ case ES_DECODE_FAILED:
+ pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+ error_code, regs->ip);
+ goto fail;
+ case ES_EXCEPTION:
+ vc_forward_exception(&ctxt);
+ break;
+ case ES_RETRY:
+ /* Nothing to do */
+ break;
+ default:
+ pr_emerg("Unknown result in %s():%d\n", __func__, result);
+ /*
+ * Emulating the instruction which caused the #VC exception
+ * failed - can't continue so print debug information
+ */
+ BUG();
+ }
+
+out:
+ instrumentation_end();
+
+ return;
+
+fail:
+ if (user_mode(regs)) {
+ /*
+ * Do not kill the machine if user-space triggered the
+ * exception. Send SIGBUS instead and let user-space deal with
+ * it.
+ */
+ force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
+ } else {
+ pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
+ result);
+
+ /* Show some debug info */
+ show_regs(regs);
+
+ /* Ask hypervisor to sev_es_terminate */
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+ /* If that fails and we get here - just panic */
+ panic("Returned from Terminate-Request to Hypervisor\n");
+ }
+
+ goto out;
+}
+
+/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
+DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
+{
+ instrumentation_begin();
+ panic("Can't handle #VC exception from unsupported context\n");
+ instrumentation_end();
+}
+
+DEFINE_IDTENTRY_VC(exc_vmm_communication)
+{
+ if (likely(!on_vc_fallback_stack(regs)))
+ safe_stack_exc_vmm_communication(regs, error_code);
+ else
+ ist_exc_vmm_communication(regs, error_code);
+}
+
+bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
+{
+ unsigned long exit_code = regs->orig_ax;
+ struct es_em_ctxt ctxt;
+ enum es_result result;
+
+ /* Do initial setup or terminate the guest */
+ if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
+ sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+ vc_ghcb_invalidate(boot_ghcb);
+
+ result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+ if (result == ES_OK)
+ result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
+
+ /* Done - now check the result */
+ switch (result) {
+ case ES_OK:
+ vc_finish_insn(&ctxt);
+ break;
+ case ES_UNSUPPORTED:
+ early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+ exit_code, regs->ip);
+ goto fail;
+ case ES_VMM_ERROR:
+ early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+ exit_code, regs->ip);
+ goto fail;
+ case ES_DECODE_FAILED:
+ early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+ exit_code, regs->ip);
+ goto fail;
+ case ES_EXCEPTION:
+ vc_early_forward_exception(&ctxt);
+ break;
+ case ES_RETRY:
+ /* Nothing to do */
+ break;
+ default:
+ BUG();
+ }
+
+ return true;
+
+fail:
+ show_regs(regs);
+
+ while (true)
+ halt();
+}
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 9ccbf0576cd0..a7f3e12cfbdb 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -27,7 +27,7 @@ static inline void signal_compat_build_tests(void)
*/
BUILD_BUG_ON(NSIGILL != 11);
BUILD_BUG_ON(NSIGFPE != 15);
- BUILD_BUG_ON(NSIGSEGV != 7);
+ BUILD_BUG_ON(NSIGSEGV != 9);
BUILD_BUG_ON(NSIGBUS != 5);
BUILD_BUG_ON(NSIGTRAP != 5);
BUILD_BUG_ON(NSIGCHLD != 6);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f5ef689dd62a..de776b2e6046 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -227,7 +227,7 @@ static void notrace start_secondary(void *unused)
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
- load_current_idt();
+ cpu_init_exception_handling();
cpu_init();
x86_cpuinit.early_percpu_clock_init();
preempt_disable();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 2fd698e28e4d..8627fda8d993 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -18,13 +18,13 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct unwind_state state;
unsigned long addr;
- if (regs && !consume_entry(cookie, regs->ip, false))
+ if (regs && !consume_entry(cookie, regs->ip))
return;
for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
- if (!addr || !consume_entry(cookie, addr, false))
+ if (!addr || !consume_entry(cookie, addr))
break;
}
}
@@ -72,7 +72,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
if (!addr)
return -EINVAL;
- if (!consume_entry(cookie, addr, false))
+ if (!consume_entry(cookie, addr))
return -EINVAL;
}
@@ -114,7 +114,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
{
const void __user *fp = (const void __user *)regs->bp;
- if (!consume_entry(cookie, regs->ip, false))
+ if (!consume_entry(cookie, regs->ip))
return;
while (1) {
@@ -128,7 +128,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
break;
if (!frame.ret_addr)
break;
- if (!consume_entry(cookie, frame.ret_addr, false))
+ if (!consume_entry(cookie, frame.ret_addr))
break;
fp = frame.next_fp;
}
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
new file mode 100644
index 000000000000..ca9a380d9c0b
--- /dev/null
+++ b/arch/x86/kernel/static_call.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/static_call.h>
+#include <linux/memory.h>
+#include <linux/bug.h>
+#include <asm/text-patching.h>
+
+enum insn_type {
+ CALL = 0, /* site call */
+ NOP = 1, /* site cond-call */
+ JMP = 2, /* tramp / site tail-call */
+ RET = 3, /* tramp / site cond-tail-call */
+};
+
+static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+{
+ int size = CALL_INSN_SIZE;
+ const void *code;
+
+ switch (type) {
+ case CALL:
+ code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
+ break;
+
+ case NOP:
+ code = ideal_nops[NOP_ATOMIC5];
+ break;
+
+ case JMP:
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
+ break;
+
+ case RET:
+ code = text_gen_insn(RET_INSN_OPCODE, insn, func);
+ size = RET_INSN_SIZE;
+ break;
+ }
+
+ if (memcmp(insn, code, size) == 0)
+ return;
+
+ if (unlikely(system_state == SYSTEM_BOOTING))
+ return text_poke_early(insn, code, size);
+
+ text_poke_bp(insn, code, size, NULL);
+}
+
+static void __static_call_validate(void *insn, bool tail)
+{
+ u8 opcode = *(u8 *)insn;
+
+ if (tail) {
+ if (opcode == JMP32_INSN_OPCODE ||
+ opcode == RET_INSN_OPCODE)
+ return;
+ } else {
+ if (opcode == CALL_INSN_OPCODE ||
+ !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5))
+ return;
+ }
+
+ /*
+ * If we ever trigger this, our text is corrupt, we'll probably not live long.
+ */
+ WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+}
+
+static inline enum insn_type __sc_insn(bool null, bool tail)
+{
+ /*
+ * Encode the following table without branches:
+ *
+ * tail null insn
+ * -----+-------+------
+ * 0 | 0 | CALL
+ * 0 | 1 | NOP
+ * 1 | 0 | JMP
+ * 1 | 1 | RET
+ */
+ return 2*tail + null;
+}
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+ mutex_lock(&text_mutex);
+
+ if (tramp) {
+ __static_call_validate(tramp, true);
+ __static_call_transform(tramp, __sc_insn(!func, true), func);
+ }
+
+ if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
+ __static_call_validate(site, tail);
+ __static_call_transform(site, __sc_insn(!func, tail), func);
+ }
+
+ mutex_unlock(&text_mutex);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/x86/kernel/sys_ia32.c b/arch/x86/kernel/sys_ia32.c
index 720cde885042..6cf65397d225 100644
--- a/arch/x86/kernel/sys_ia32.c
+++ b/arch/x86/kernel/sys_ia32.c
@@ -251,6 +251,6 @@ COMPAT_SYSCALL_DEFINE5(ia32_clone, unsigned long, clone_flags,
.tls = tls_val,
};
- return _do_fork(&args);
+ return kernel_clone(&args);
}
#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 81a2fb711091..e19df6cde35d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -43,6 +43,7 @@
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
+#include <asm/realmode.h>
#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
@@ -195,7 +196,7 @@ static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)
DEFINE_IDTENTRY(exc_divide_error)
{
- do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE,
+ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
FPE_INTDIV, error_get_trap_addr(regs));
}
@@ -673,6 +674,50 @@ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
return regs;
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *regs)
+{
+ unsigned long sp, *stack;
+ struct stack_info info;
+ struct pt_regs *regs_ret;
+
+ /*
+ * In the SYSCALL entry path the RSP value comes from user-space - don't
+ * trust it and switch to the current kernel stack
+ */
+ if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+ regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) {
+ sp = this_cpu_read(cpu_current_top_of_stack);
+ goto sync;
+ }
+
+ /*
+ * From here on the RSP value is trusted. Now check whether entry
+ * happened from a safe stack. Not safe are the entry or unknown stacks,
+ * use the fall-back stack instead in this case.
+ */
+ sp = regs->sp;
+ stack = (unsigned long *)sp;
+
+ if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
+ info.type >= STACK_TYPE_EXCEPTION_LAST)
+ sp = __this_cpu_ist_top_va(VC2);
+
+sync:
+ /*
+ * Found a safe stack - switch to it as if the entry didn't happen via
+ * IST stack. The code below only copies pt_regs, the real switch happens
+ * in assembly code.
+ */
+ sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
+
+ regs_ret = (struct pt_regs *)sp;
+ *regs_ret = *regs;
+
+ return regs_ret;
+}
+#endif
+
struct bad_iret_stack {
void *error_entry_ret;
struct pt_regs regs;
@@ -745,9 +790,8 @@ static __always_inline unsigned long debug_read_clear_dr6(void)
* Keep it simple: clear DR6 immediately.
*/
get_debugreg(dr6, 6);
- set_debugreg(0, 6);
- /* Filter out all the reserved bits which are preset to 1 */
- dr6 &= ~DR6_RESERVED;
+ set_debugreg(DR6_RESERVED, 6);
+ dr6 ^= DR6_RESERVED; /* Flip to positive polarity */
return dr6;
}
@@ -776,74 +820,20 @@ static __always_inline unsigned long debug_read_clear_dr6(void)
*
* May run on IST stack.
*/
-static void handle_debug(struct pt_regs *regs, unsigned long dr6, bool user)
-{
- struct task_struct *tsk = current;
- bool user_icebp;
- int si_code;
-
- /*
- * The SDM says "The processor clears the BTF flag when it
- * generates a debug exception." Clear TIF_BLOCKSTEP to keep
- * TIF_BLOCKSTEP in sync with the hardware BTF flag.
- */
- clear_thread_flag(TIF_BLOCKSTEP);
-
- /*
- * If DR6 is zero, no point in trying to handle it. The kernel is
- * not using INT1.
- */
- if (!user && !dr6)
- return;
+static bool notify_debug(struct pt_regs *regs, unsigned long *dr6)
+{
/*
- * If dr6 has no reason to give us about the origin of this trap,
- * then it's very likely the result of an icebp/int01 trap.
- * User wants a sigtrap for that.
+ * Notifiers will clear bits in @dr6 to indicate the event has been
+ * consumed - hw_breakpoint_handler(), single_stop_cont().
+ *
+ * Notifiers will set bits in @virtual_dr6 to indicate the desire
+ * for signals - ptrace_triggered(), kgdb_hw_overflow_handler().
*/
- user_icebp = user && !dr6;
-
- /* Store the virtualized DR6 value */
- tsk->thread.debugreg6 = dr6;
-
-#ifdef CONFIG_KPROBES
- if (kprobe_debug_handler(regs)) {
- return;
- }
-#endif
-
- if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, 0,
- SIGTRAP) == NOTIFY_STOP) {
- return;
- }
-
- /* It's safe to allow irq's after DR6 has been saved */
- cond_local_irq_enable(regs);
-
- if (v8086_mode(regs)) {
- handle_vm86_trap((struct kernel_vm86_regs *) regs, 0,
- X86_TRAP_DB);
- goto out;
- }
-
- if (WARN_ON_ONCE((dr6 & DR_STEP) && !user_mode(regs))) {
- /*
- * Historical junk that used to handle SYSENTER single-stepping.
- * This should be unreachable now. If we survive for a while
- * without anyone hitting this warning, we'll turn this into
- * an oops.
- */
- tsk->thread.debugreg6 &= ~DR_STEP;
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
- }
-
- si_code = get_si_code(tsk->thread.debugreg6);
- if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
- send_sigtrap(regs, 0, si_code);
+ if (notify_die(DIE_DEBUG, "debug", regs, (long)dr6, 0, SIGTRAP) == NOTIFY_STOP)
+ return true;
-out:
- cond_local_irq_disable(regs);
+ return false;
}
static __always_inline void exc_debug_kernel(struct pt_regs *regs,
@@ -870,6 +860,20 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
*/
WARN_ON_ONCE(user_mode(regs));
+ if (test_thread_flag(TIF_BLOCKSTEP)) {
+ /*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." but PTRACE_BLOCKSTEP requested
+ * it for userspace, but we just took a kernel #DB, so re-set
+ * BTF.
+ */
+ unsigned long debugctl;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ debugctl |= DEBUGCTLMSR_BTF;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ }
+
/*
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a
* watchpoint at the same time then that will still be handled.
@@ -877,8 +881,32 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
dr6 &= ~DR_STEP;
- handle_debug(regs, dr6, false);
+ if (kprobe_debug_handler(regs))
+ goto out;
+
+ /*
+ * The kernel doesn't use INT1
+ */
+ if (!dr6)
+ goto out;
+ if (notify_debug(regs, &dr6))
+ goto out;
+
+ /*
+ * The kernel doesn't use TF single-step outside of:
+ *
+ * - Kprobes, consumed through kprobe_debug_handler()
+ * - KGDB, consumed through notify_debug()
+ *
+ * So if we get here with DR_STEP set, something is wonky.
+ *
+ * A known way to trigger this is through QEMU's GDB stub,
+ * which leaks #DB into the guest and causes IST recursion.
+ */
+ if (WARN_ON_ONCE(dr6 & DR_STEP))
+ regs->flags &= ~X86_EFLAGS_TF;
+out:
instrumentation_end();
idtentry_exit_nmi(regs, irq_state);
@@ -888,6 +916,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
static __always_inline void exc_debug_user(struct pt_regs *regs,
unsigned long dr6)
{
+ bool icebp;
+
/*
* If something gets miswired and we end up here for a kernel mode
* #DB, we will malfunction.
@@ -906,8 +936,48 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
- handle_debug(regs, dr6, true);
+ /*
+ * Start the virtual/ptrace DR6 value with just the DR_STEP mask
+ * of the real DR6. ptrace_triggered() will set the DR_TRAPn bits.
+ *
+ * Userspace expects DR_STEP to be visible in ptrace_get_debugreg(6)
+ * even if it is not the result of PTRACE_SINGLESTEP.
+ */
+ current->thread.virtual_dr6 = (dr6 & DR_STEP);
+
+ /*
+ * The SDM says "The processor clears the BTF flag when it
+ * generates a debug exception." Clear TIF_BLOCKSTEP to keep
+ * TIF_BLOCKSTEP in sync with the hardware BTF flag.
+ */
+ clear_thread_flag(TIF_BLOCKSTEP);
+ /*
+ * If dr6 has no reason to give us about the origin of this trap,
+ * then it's very likely the result of an icebp/int01 trap.
+ * User wants a sigtrap for that.
+ */
+ icebp = !dr6;
+
+ if (notify_debug(regs, &dr6))
+ goto out;
+
+ /* It's safe to allow irq's after DR6 has been saved */
+ local_irq_enable();
+
+ if (v8086_mode(regs)) {
+ handle_vm86_trap((struct kernel_vm86_regs *)regs, 0, X86_TRAP_DB);
+ goto out_irq;
+ }
+
+ /* Add the virtual_dr6 bits for signals. */
+ dr6 |= current->thread.virtual_dr6;
+ if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)
+ send_sigtrap(regs, 0, get_si_code(dr6));
+
+out_irq:
+ local_irq_disable();
+out:
instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
@@ -1074,6 +1144,9 @@ void __init trap_init(void)
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
+ /* Init GHCB memory pages when running as an SEV-ES guest */
+ sev_es_init_vc_handling();
+
idt_setup_traps();
/*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 49d925043171..f70dffc2771f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -54,7 +54,7 @@ struct clocksource *art_related_clocksource;
struct cyc2ns {
struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */
- seqcount_t seq; /* 32 + 4 = 36 */
+ seqcount_latch_t seq; /* 32 + 4 = 36 */
}; /* fits one cacheline */
@@ -73,14 +73,14 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
preempt_disable_notrace();
do {
- seq = this_cpu_read(cyc2ns.seq.sequence);
+ seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
idx = seq & 1;
data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
- } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
+ } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}
__always_inline void cyc2ns_read_end(void)
@@ -186,7 +186,7 @@ static void __init cyc2ns_init_boot_cpu(void)
{
struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
- seqcount_init(&c2n->seq);
+ seqcount_latch_init(&c2n->seq);
__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
}
@@ -203,7 +203,7 @@ static void __init cyc2ns_init_secondary_cpus(void)
for_each_possible_cpu(cpu) {
if (cpu != this_cpu) {
- seqcount_init(&c2n->seq);
+ seqcount_latch_init(&c2n->seq);
c2n = per_cpu_ptr(&cyc2ns, cpu);
c2n->data[0] = data[0];
c2n->data[1] = data[1];
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 8d5cbe1bbb3b..f6225bf22c02 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -45,11 +45,12 @@
* value that, lies close to the top of the kernel memory. The limit for the GDT
* and the IDT are set to zero.
*
- * Given that SLDT and STR are not commonly used in programs that run on WineHQ
- * or DOSEMU2, they are not emulated.
- *
- * The instruction smsw is emulated to return the value that the register CR0
+ * The instruction SMSW is emulated to return the value that the register CR0
* has at boot time as set in the head_32.
+ * SLDT and STR are emulated to return the values that the kernel programmatically
+ * assigns:
+ * - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not.
+ * - STR returns (GDT_ENTRY_TSS * 8).
*
* Emulation is provided for both 32-bit and 64-bit processes.
*
@@ -244,16 +245,34 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
*data_size += UMIP_GDT_IDT_LIMIT_SIZE;
memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
- } else if (umip_inst == UMIP_INST_SMSW) {
- unsigned long dummy_value = CR0_STATE;
+ } else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT ||
+ umip_inst == UMIP_INST_STR) {
+ unsigned long dummy_value;
+
+ if (umip_inst == UMIP_INST_SMSW) {
+ dummy_value = CR0_STATE;
+ } else if (umip_inst == UMIP_INST_STR) {
+ dummy_value = GDT_ENTRY_TSS * 8;
+ } else if (umip_inst == UMIP_INST_SLDT) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ down_read(&current->mm->context.ldt_usr_sem);
+ if (current->mm->context.ldt)
+ dummy_value = GDT_ENTRY_LDT * 8;
+ else
+ dummy_value = 0;
+ up_read(&current->mm->context.ldt_usr_sem);
+#else
+ dummy_value = 0;
+#endif
+ }
/*
- * Even though the CR0 register has 4 bytes, the number
+ * For these 3 instructions, the number
* of bytes to be copied in the result buffer is determined
* by whether the operand is a register or a memory location.
* If operand is a register, return as many bytes as the operand
* size. If operand is memory, return only the two least
- * siginificant bytes of CR0.
+ * siginificant bytes.
*/
if (X86_MODRM_MOD(insn->modrm.value) == 3)
*data_size = insn->opnd_bytes;
@@ -261,7 +280,6 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
*data_size = 2;
memcpy(data, &dummy_value, *data_size);
- /* STR and SLDT are not emulated */
} else {
return -EINVAL;
}
@@ -317,63 +335,28 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
*/
bool fixup_umip_exception(struct pt_regs *regs)
{
- int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst;
- unsigned long seg_base = 0, *reg_addr;
+ int nr_copied, reg_offset, dummy_data_size, umip_inst;
/* 10 bytes is the maximum size of the result of UMIP instructions */
unsigned char dummy_data[10] = { 0 };
unsigned char buf[MAX_INSN_SIZE];
+ unsigned long *reg_addr;
void __user *uaddr;
struct insn insn;
- int seg_defs;
if (!regs)
return false;
- /*
- * If not in user-space long mode, a custom code segment could be in
- * use. This is true in protected mode (if the process defined a local
- * descriptor table), or virtual-8086 mode. In most of the cases
- * seg_base will be zero as in USER_CS.
- */
- if (!user_64bit_mode(regs))
- seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
-
- if (seg_base == -1L)
- return false;
-
- not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
- sizeof(buf));
- nr_copied = sizeof(buf) - not_copied;
+ nr_copied = insn_fetch_from_user(regs, buf);
/*
- * The copy_from_user above could have failed if user code is protected
- * by a memory protection key. Give up on emulation in such a case.
- * Should we issue a page fault?
+ * The insn_fetch_from_user above could have failed if user code
+ * is protected by a memory protection key. Give up on emulation
+ * in such a case. Should we issue a page fault?
*/
if (!nr_copied)
return false;
- insn_init(&insn, buf, nr_copied, user_64bit_mode(regs));
-
- /*
- * Override the default operand and address sizes with what is specified
- * in the code segment descriptor. The instruction decoder only sets
- * the address size it to either 4 or 8 address bytes and does nothing
- * for the operand bytes. This OK for most of the cases, but we could
- * have special cases where, for instance, a 16-bit code segment
- * descriptor is used.
- * If there is an address override prefix, the instruction decoder
- * correctly updates these values, even for 16-bit defaults.
- */
- seg_defs = insn_get_code_seg_params(regs);
- if (seg_defs == -EINVAL)
- return false;
-
- insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
- insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
-
- insn_get_length(&insn);
- if (nr_copied < insn.length)
+ if (!insn_decode(&insn, regs, buf, nr_copied))
return false;
umip_inst = identify_insn(&insn);
@@ -383,10 +366,6 @@ bool fixup_umip_exception(struct pt_regs *regs)
umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
- /* Do not emulate (spoof) SLDT or STR. */
- if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
- return false;
-
umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index ec88bbe08a32..73f800100066 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
#include <asm/ptrace.h>
@@ -127,12 +128,12 @@ static struct orc_entry null_orc_entry = {
.sp_offset = sizeof(long),
.sp_reg = ORC_REG_SP,
.bp_reg = ORC_REG_UNDEFINED,
- .type = ORC_TYPE_CALL
+ .type = UNWIND_HINT_TYPE_CALL
};
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
- .type = ORC_TYPE_CALL,
+ .type = UNWIND_HINT_TYPE_CALL,
.sp_reg = ORC_REG_BP,
.sp_offset = 16,
.bp_reg = ORC_REG_PREV_SP,
@@ -320,19 +321,12 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address);
unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
{
- struct task_struct *task = state->task;
-
if (unwind_done(state))
return NULL;
if (state->regs)
return &state->regs->ip;
- if (task != current && state->sp == task->thread.sp) {
- struct inactive_task_frame *frame = (void *)task->thread.sp;
- return &frame->ret_addr;
- }
-
if (state->sp)
return (unsigned long *)state->sp - 1;
@@ -531,7 +525,7 @@ bool unwind_next_frame(struct unwind_state *state)
/* Find IP, SP and possibly regs: */
switch (orc->type) {
- case ORC_TYPE_CALL:
+ case UNWIND_HINT_TYPE_CALL:
ip_p = sp - sizeof(long);
if (!deref_stack_reg(state, ip_p, &state->ip))
@@ -546,7 +540,7 @@ bool unwind_next_frame(struct unwind_state *state)
state->signal = false;
break;
- case ORC_TYPE_REGS:
+ case UNWIND_HINT_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
orc_warn_current("can't access registers at %pB\n",
(void *)orig_ip);
@@ -559,7 +553,7 @@ bool unwind_next_frame(struct unwind_state *state)
state->signal = true;
break;
- case ORC_TYPE_REGS_IRET:
+ case UNWIND_HINT_TYPE_REGS_PARTIAL:
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
orc_warn_current("can't access iret registers at %pB\n",
(void *)orig_ip);
@@ -662,7 +656,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
} else {
struct inactive_task_frame *frame = (void *)task->thread.sp;
- state->sp = task->thread.sp;
+ state->sp = task->thread.sp + sizeof(*frame);
state->bp = READ_ONCE_NOCHECK(frame->bp);
state->ip = READ_ONCE_NOCHECK(frame->ret_addr);
state->signal = (void *)state->ip == ret_from_fork;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9a03e5b23135..bf9e0adb5b7e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -136,6 +136,7 @@ SECTIONS
ENTRY_TEXT
ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
+ STATIC_CALL_TEXT
*(.fixup)
*(.gnu.warning)
@@ -411,10 +412,47 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
+ ELF_DETAILS
DISCARDS
-}
+ /*
+ * Make sure that the .got.plt is either completely empty or it
+ * contains only the lazy dispatch entries.
+ */
+ .got.plt (INFO) : { *(.got.plt) }
+ ASSERT(SIZEOF(.got.plt) == 0 ||
+#ifdef CONFIG_X86_64
+ SIZEOF(.got.plt) == 0x18,
+#else
+ SIZEOF(.got.plt) == 0xc,
+#endif
+ "Unexpected GOT/PLT entries detected!")
+
+ /*
+ * Sections that should stay zero sized, which is safer to
+ * explicitly check instead of blindly discarding.
+ */
+ .got : {
+ *(.got) *(.igot.*)
+ }
+ ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!")
+
+ .plt : {
+ *(.plt) *(.plt.*) *(.iplt)
+ }
+ ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
+
+ .rel.dyn : {
+ *(.rel.*) *(.rel_*)
+ }
+ ASSERT(SIZEOF(.rel.dyn) == 0, "Unexpected run-time relocations (.rel) detected!")
+
+ .rela.dyn : {
+ *(.rela.*) *(.rela_*)
+ }
+ ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
+}
#ifdef CONFIG_X86_32
/*
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 123f1c1f1788..a3038d8deb6a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -24,6 +24,7 @@
#include <asm/tsc.h>
#include <asm/iommu.h>
#include <asm/mach_traps.h>
+#include <asm/irqdomain.h>
void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
@@ -67,11 +68,7 @@ struct x86_init_ops x86_init __initdata = {
},
.mpparse = {
- .mpc_record = x86_init_uint_noop,
.setup_ioapic_ids = x86_init_noop,
- .mpc_apic_id = default_mpc_apic_id,
- .smp_read_mpc_oem = default_smp_read_mpc_oem,
- .mpc_oem_bus_info = default_mpc_oem_bus_info,
.find_smp_config = default_find_smp_config,
.get_smp_config = default_get_smp_config,
},
@@ -80,7 +77,8 @@ struct x86_init_ops x86_init __initdata = {
.pre_vector_init = init_ISA_irqs,
.intr_init = native_init_IRQ,
.intr_mode_select = apic_intr_mode_select,
- .intr_mode_init = apic_intr_mode_init
+ .intr_mode_init = apic_intr_mode_init,
+ .create_pci_msi_domain = native_create_pci_msi_domain,
},
.oem = {
@@ -148,28 +146,10 @@ EXPORT_SYMBOL_GPL(x86_platform);
#if defined(CONFIG_PCI_MSI)
struct x86_msi_ops x86_msi __ro_after_init = {
- .setup_msi_irqs = native_setup_msi_irqs,
- .teardown_msi_irq = native_teardown_msi_irq,
- .teardown_msi_irqs = default_teardown_msi_irqs,
.restore_msi_irqs = default_restore_msi_irqs,
};
/* MSI arch specific hooks */
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
- return x86_msi.setup_msi_irqs(dev, nvec, type);
-}
-
-void arch_teardown_msi_irqs(struct pci_dev *dev)
-{
- x86_msi.teardown_msi_irqs(dev);
-}
-
-void arch_teardown_msi_irq(unsigned int irq)
-{
- x86_msi.teardown_msi_irq(irq);
-}
-
void arch_restore_msi_irqs(struct pci_dev *dev)
{
x86_msi.restore_msi_irqs(dev);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index fbd5bd7a945a..f92dfd8ef10d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -66,6 +66,7 @@ config KVM_WERROR
default y if X86_64 && !KASAN
# We use the dependency on !COMPILE_TEST to not be enabled
# blindly in allmodconfig or allyesconfig configurations
+ depends on KVM
depends on (X86_64 && !KASAN) || !COMPILE_TEST
depends on EXPERT
help
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4a3081e9f4b5..b804444e16d4 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -15,9 +15,11 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
- hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
+ hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
+ mmu/spte.o mmu/tdp_iter.o mmu/tdp_mmu.o
-kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
+kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
+ vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3fd6eec202d7..06a278b3701d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -54,7 +54,24 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
#define F feature_bit
-static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
+static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
+ struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
+{
+ struct kvm_cpuid_entry2 *e;
+ int i;
+
+ for (i = 0; i < nent; i++) {
+ e = &entries[i];
+
+ if (e->function == function && (e->index == index ||
+ !(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX)))
+ return e;
+ }
+
+ return NULL;
+}
+
+static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
{
struct kvm_cpuid_entry2 *best;
@@ -62,7 +79,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
- best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;
@@ -107,6 +124,13 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ /*
+ * save the feature bitmap to avoid cpuid lookup for every PV
+ * operation
+ */
+ if (best)
+ vcpu->arch.pv_cpuid.features = best->eax;
+
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (best)
@@ -121,8 +145,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
- kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
-
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
@@ -146,7 +168,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
vcpu->arch.cr4_guest_rsvd_bits =
__cr4_reserved_bits(guest_cpuid_has, vcpu);
- kvm_x86_ops.update_exception_bitmap(vcpu);
+
+ /* Invoke the vendor callback only after the above state is updated. */
+ kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
}
static int is_efer_nx(void)
@@ -186,7 +210,6 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
not_found:
return 36;
}
-EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
@@ -194,46 +217,53 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry __user *entries)
{
int r, i;
- struct kvm_cpuid_entry *cpuid_entries = NULL;
+ struct kvm_cpuid_entry *e = NULL;
+ struct kvm_cpuid_entry2 *e2 = NULL;
- r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
- goto out;
+ return -E2BIG;
+
if (cpuid->nent) {
- cpuid_entries = vmemdup_user(entries,
- array_size(sizeof(struct kvm_cpuid_entry),
- cpuid->nent));
- if (IS_ERR(cpuid_entries)) {
- r = PTR_ERR(cpuid_entries);
- goto out;
+ e = vmemdup_user(entries, array_size(sizeof(*e), cpuid->nent));
+ if (IS_ERR(e))
+ return PTR_ERR(e);
+
+ e2 = kvmalloc_array(cpuid->nent, sizeof(*e2), GFP_KERNEL_ACCOUNT);
+ if (!e2) {
+ r = -ENOMEM;
+ goto out_free_cpuid;
}
}
for (i = 0; i < cpuid->nent; i++) {
- vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
- vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
- vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
- vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
- vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
- vcpu->arch.cpuid_entries[i].index = 0;
- vcpu->arch.cpuid_entries[i].flags = 0;
- vcpu->arch.cpuid_entries[i].padding[0] = 0;
- vcpu->arch.cpuid_entries[i].padding[1] = 0;
- vcpu->arch.cpuid_entries[i].padding[2] = 0;
+ e2[i].function = e[i].function;
+ e2[i].eax = e[i].eax;
+ e2[i].ebx = e[i].ebx;
+ e2[i].ecx = e[i].ecx;
+ e2[i].edx = e[i].edx;
+ e2[i].index = 0;
+ e2[i].flags = 0;
+ e2[i].padding[0] = 0;
+ e2[i].padding[1] = 0;
+ e2[i].padding[2] = 0;
}
- vcpu->arch.cpuid_nent = cpuid->nent;
- r = kvm_check_cpuid(vcpu);
+
+ r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
- vcpu->arch.cpuid_nent = 0;
- kvfree(cpuid_entries);
- goto out;
+ kvfree(e2);
+ goto out_free_cpuid;
}
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = cpuid->nent;
+
cpuid_fix_nx_cap(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
- kvfree(cpuid_entries);
-out:
+out_free_cpuid:
+ kvfree(e);
+
return r;
}
@@ -241,26 +271,32 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
+ struct kvm_cpuid_entry2 *e2 = NULL;
int r;
- r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
- goto out;
- r = -EFAULT;
- if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
- cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
- goto out;
- vcpu->arch.cpuid_nent = cpuid->nent;
- r = kvm_check_cpuid(vcpu);
+ return -E2BIG;
+
+ if (cpuid->nent) {
+ e2 = vmemdup_user(entries, array_size(sizeof(*e2), cpuid->nent));
+ if (IS_ERR(e2))
+ return PTR_ERR(e2);
+ }
+
+ r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
- vcpu->arch.cpuid_nent = 0;
- goto out;
+ kvfree(e2);
+ return r;
}
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = cpuid->nent;
+
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
-out:
- return r;
+
+ return 0;
}
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
@@ -371,7 +407,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
- F(SERIALIZE)
+ F(SERIALIZE) | F(TSXLDTRK)
);
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
@@ -941,17 +977,8 @@ out_free:
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *e;
- int i;
-
- for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- e = &vcpu->arch.cpuid_entries[i];
-
- if (e->function == function && (e->index == index ||
- !(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX)))
- return e;
- }
- return NULL;
+ return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
+ function, index);
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 3a923ae15f2f..bf8577947ed2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -5,6 +5,7 @@
#include "x86.h"
#include <asm/cpu.h>
#include <asm/processor.h>
+#include <uapi/asm/kvm_para.h>
extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
@@ -34,6 +35,11 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
return vcpu->arch.maxphyaddr;
}
+static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+ return (gpa >= BIT_ULL(cpuid_maxphyaddr(vcpu)));
+}
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -308,4 +314,13 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
}
+static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
+ unsigned int kvm_feature)
+{
+ if (!vcpu->arch.pv_cpuid.enforce)
+ return true;
+
+ return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
+}
+
#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2f6510de6b0c..0d917eb70319 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3606,7 +3606,7 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt)
u64 tsc_aux = 0;
if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
- return emulate_gp(ctxt, 0);
+ return emulate_ud(ctxt);
ctxt->dst.val = tsc_aux;
return X86EMUL_CONTINUE;
}
@@ -3701,21 +3701,35 @@ static int em_dr_write(struct x86_emulate_ctxt *ctxt)
static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
{
+ u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
u64 msr_data;
+ int r;
msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
- if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
+ r = ctxt->ops->set_msr(ctxt, msr_index, msr_data);
+
+ if (r == X86EMUL_IO_NEEDED)
+ return r;
+
+ if (r > 0)
return emulate_gp(ctxt, 0);
- return X86EMUL_CONTINUE;
+ return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
}
static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
{
+ u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
u64 msr_data;
+ int r;
+
+ r = ctxt->ops->get_msr(ctxt, msr_index, &msr_data);
+
+ if (r == X86EMUL_IO_NEEDED)
+ return r;
- if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
+ if (r)
return emulate_gp(ctxt, 0);
*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1d330564eed8..5c7c4060b45c 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -633,6 +633,11 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
{
union hv_stimer_config new_config = {.as_uint64 = config},
old_config = {.as_uint64 = stimer->config.as_uint64};
+ struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+ struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+
+ if (!synic->active && !host)
+ return 1;
trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index, config, host);
@@ -652,6 +657,12 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
bool host)
{
+ struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+ struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+
+ if (!synic->active && !host)
+ return 1;
+
trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
stimer->index, count, host);
@@ -2000,20 +2011,20 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
break;
case HYPERV_CPUID_FEATURES:
- ent->eax |= HV_X64_MSR_VP_RUNTIME_AVAILABLE;
+ ent->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
- ent->eax |= HV_X64_MSR_SYNIC_AVAILABLE;
+ ent->eax |= HV_MSR_SYNIC_AVAILABLE;
ent->eax |= HV_MSR_SYNTIMER_AVAILABLE;
- ent->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
- ent->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
- ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE;
- ent->eax |= HV_X64_MSR_RESET_AVAILABLE;
+ ent->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+ ent->eax |= HV_MSR_HYPERCALL_AVAILABLE;
+ ent->eax |= HV_MSR_VP_INDEX_AVAILABLE;
+ ent->eax |= HV_MSR_RESET_AVAILABLE;
ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
- ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS;
- ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT;
+ ent->eax |= HV_ACCESS_FREQUENCY_MSRS;
+ ent->eax |= HV_ACCESS_REENLIGHTENMENT;
- ent->ebx |= HV_X64_POST_MESSAGES;
- ent->ebx |= HV_X64_SIGNAL_EVENTS;
+ ent->ebx |= HV_POST_MESSAGES;
+ ent->ebx |= HV_SIGNAL_EVENTS;
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index d057376bd3d3..698969e18fe3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -197,12 +197,9 @@ static void ioapic_lazy_update_eoi(struct kvm_ioapic *ioapic, int irq)
/*
* If no longer has pending EOI in LAPICs, update
- * EOI for this vetor.
+ * EOI for this vector.
*/
rtc_irq_eoi(ioapic, vcpu, entry->fields.vector);
- kvm_ioapic_update_eoi_one(vcpu, ioapic,
- entry->fields.trig_mode,
- irq);
break;
}
}
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index cfe83d4ae625..a889563ad02d 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -7,7 +7,7 @@
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 35cca2e0c802..105e7859d1f2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -310,6 +310,12 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
+static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
+{
+ kvm_lapic_set_reg(apic, APIC_DFR, val);
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
+}
+
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
{
return ((id >> 4) << 16) | (1 << (id & 0xf));
@@ -488,6 +494,12 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
}
}
+void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
+{
+ apic_clear_irr(vec, vcpu->arch.apic);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
+
static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
{
struct kvm_vcpu *vcpu;
@@ -1576,9 +1588,6 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
- if (apic->lapic_timer.expired_tscdeadline == 0)
- return;
-
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
@@ -1593,7 +1602,10 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
- if (lapic_timer_int_injected(vcpu))
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns &&
+ lapic_timer_int_injected(vcpu))
__kvm_wait_lapic_expire(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
@@ -1629,14 +1641,15 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
}
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
- if (apic->lapic_timer.timer_advance_ns)
- __kvm_wait_lapic_expire(vcpu);
+ kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic);
return;
}
atomic_inc(&apic->lapic_timer.pending);
- kvm_set_pending_timer(vcpu);
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ if (from_timer_fn)
+ kvm_vcpu_kick(vcpu);
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
@@ -1984,10 +1997,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
case APIC_DFR:
- if (!apic_x2apic_mode(apic)) {
- kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
- atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
- } else
+ if (!apic_x2apic_mode(apic))
+ kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
+ else
ret = 1;
break;
@@ -2183,8 +2195,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) ||
- !apic_lvtt_tscdeadline(apic))
+ if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
return 0;
return apic->lapic_timer.tscdeadline;
@@ -2194,8 +2205,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!kvm_apic_present(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
+ if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
return;
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2303,7 +2313,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
- kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
+ kvm_apic_set_dfr(apic, 0xffffffffU);
apic_set_spiv(apic, 0xff);
kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
if (!apic_x2apic_mode(apic))
@@ -2461,6 +2471,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
__apic_update_ppr(apic, &ppr);
return apic_has_interrupt_for_ppr(apic, ppr);
}
+EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 754f29beb83e..4fb86e3a9dd3 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -89,6 +89,7 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int shorthand, unsigned int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec);
bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr);
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 5efc6081ca13..9c4a9c8e43d9 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -155,11 +155,6 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}
-static inline bool kvm_mmu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
-{
- return (gpa >= BIT_ULL(cpuid_maxphyaddr(vcpu)));
-}
-
/*
* Check if a given access (described through the I/D, W/R and U/S bits of a
* page fault error code pfec) causes a permission fault with the given PTE
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 71aa3da2a0b7..1f96adff8dc4 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -19,10 +19,12 @@
#include "ioapic.h"
#include "mmu.h"
#include "mmu_internal.h"
+#include "tdp_mmu.h"
#include "x86.h"
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
#include "cpuid.h"
+#include "spte.h"
#include <linux/kvm_host.h>
#include <linux/types.h>
@@ -45,7 +47,6 @@
#include <asm/page.h>
#include <asm/memtype.h>
#include <asm/cmpxchg.h>
-#include <asm/e820/api.h>
#include <asm/io.h>
#include <asm/vmx.h>
#include <asm/kvm_page_track.h>
@@ -64,12 +65,12 @@ static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
-static struct kernel_param_ops nx_huge_pages_ops = {
+static const struct kernel_param_ops nx_huge_pages_ops = {
.set = set_nx_huge_pages,
.get = param_get_bool,
};
-static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
+static const struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
.set = set_nx_huge_pages_recovery_ratio,
.get = param_get_uint,
};
@@ -104,45 +105,13 @@ enum {
AUDIT_POST_SYNC
};
-#undef MMU_DEBUG
-
#ifdef MMU_DEBUG
-static bool dbg = 0;
+bool dbg = 0;
module_param(dbg, bool, 0644);
-
-#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
-#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
-#define MMU_WARN_ON(x) WARN_ON(x)
-#else
-#define pgprintk(x...) do { } while (0)
-#define rmap_printk(x...) do { } while (0)
-#define MMU_WARN_ON(x) do { } while (0)
#endif
#define PTE_PREFETCH_NUM 8
-#define PT_FIRST_AVAIL_BITS_SHIFT 10
-#define PT64_SECOND_AVAIL_BITS_SHIFT 54
-
-/*
- * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
- * Access Tracking SPTEs.
- */
-#define SPTE_SPECIAL_MASK (3ULL << 52)
-#define SPTE_AD_ENABLED_MASK (0ULL << 52)
-#define SPTE_AD_DISABLED_MASK (1ULL << 52)
-#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52)
-#define SPTE_MMIO_MASK (3ULL << 52)
-
-#define PT64_LEVEL_BITS 9
-
-#define PT64_LEVEL_SHIFT(level) \
- (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
-
-#define PT64_INDEX(address, level)\
- (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
-
-
#define PT32_LEVEL_BITS 10
#define PT32_LEVEL_SHIFT(level) \
@@ -156,18 +125,6 @@ module_param(dbg, bool, 0644);
(((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
-#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
-#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
-#else
-#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
-#endif
-#define PT64_LVL_ADDR_MASK(level) \
- (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
- * PT64_LEVEL_BITS))) - 1))
-#define PT64_LVL_OFFSET_MASK(level) \
- (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
- * PT64_LEVEL_BITS))) - 1))
-
#define PT32_BASE_ADDR_MASK PAGE_MASK
#define PT32_DIR_BASE_ADDR_MASK \
(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
@@ -175,42 +132,11 @@ module_param(dbg, bool, 0644);
(PAGE_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
* PT32_LEVEL_BITS))) - 1))
-#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
- | shadow_x_mask | shadow_nx_mask | shadow_me_mask)
-
-#define ACC_EXEC_MASK 1
-#define ACC_WRITE_MASK PT_WRITABLE_MASK
-#define ACC_USER_MASK PT_USER_MASK
-#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
-
-/* The mask for the R/X bits in EPT PTEs */
-#define PT64_EPT_READABLE_MASK 0x1ull
-#define PT64_EPT_EXECUTABLE_MASK 0x4ull
-
#include <trace/events/kvm.h>
-#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
-#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
-
-#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
-
/* make pte_list_desc fit well in cache line */
#define PTE_LIST_EXT 3
-/*
- * Return values of handle_mmio_page_fault and mmu.page_fault:
- * RET_PF_RETRY: let CPU fault again on the address.
- * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
- *
- * For handle_mmio_page_fault only:
- * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
- */
-enum {
- RET_PF_RETRY = 0,
- RET_PF_EMULATE = 1,
- RET_PF_INVALID = 2,
-};
-
struct pte_list_desc {
u64 *sptes[PTE_LIST_EXT];
struct pte_list_desc *more;
@@ -242,65 +168,10 @@ struct kvm_shadow_walk_iterator {
__shadow_walk_next(&(_walker), spte))
static struct kmem_cache *pte_list_desc_cache;
-static struct kmem_cache *mmu_page_header_cache;
+struct kmem_cache *mmu_page_header_cache;
static struct percpu_counter kvm_total_used_mmu_pages;
-static u64 __read_mostly shadow_nx_mask;
-static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
-static u64 __read_mostly shadow_user_mask;
-static u64 __read_mostly shadow_accessed_mask;
-static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mmio_value;
-static u64 __read_mostly shadow_mmio_access_mask;
-static u64 __read_mostly shadow_present_mask;
-static u64 __read_mostly shadow_me_mask;
-
-/*
- * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
- * shadow_acc_track_mask is the set of bits to be cleared in non-accessed
- * pages.
- */
-static u64 __read_mostly shadow_acc_track_mask;
-
-/*
- * The mask/shift to use for saving the original R/X bits when marking the PTE
- * as not-present for access tracking purposes. We do not save the W bit as the
- * PTEs being access tracked also need to be dirty tracked, so the W bit will be
- * restored only when a write is attempted to the page.
- */
-static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
- PT64_EPT_EXECUTABLE_MASK;
-static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
-
-/*
- * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
- * to guard against L1TF attacks.
- */
-static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
-
-/*
- * The number of high-order 1 bits to use in the mask above.
- */
-static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
-
-/*
- * In some cases, we need to preserve the GFN of a non-present or reserved
- * SPTE when we usurp the upper five bits of the physical address space to
- * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll
- * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
- * left into the reserved bits, i.e. the GFN in the SPTE will be split into
- * high and low parts. This mask covers the lower bits of the GFN.
- */
-static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
-
-/*
- * The number of non-reserved physical address bits irrespective of features
- * that repurpose legal bits, e.g. MKTME.
- */
-static u8 __read_mostly shadow_phys_bits;
-
static void mmu_spte_set(u64 *sptep, u64 spte);
-static bool is_executable_pte(u64 spte);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
@@ -325,7 +196,7 @@ static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm);
}
-static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
+void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
u64 start_gfn, u64 pages)
{
struct kvm_tlb_range range;
@@ -336,143 +207,17 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
kvm_flush_remote_tlbs_with_range(kvm, &range);
}
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask)
-{
- BUG_ON((u64)(unsigned)access_mask != access_mask);
- WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len));
- WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
- shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
- shadow_mmio_access_mask = access_mask;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
-
-static bool is_mmio_spte(u64 spte)
-{
- return (spte & SPTE_SPECIAL_MASK) == SPTE_MMIO_MASK;
-}
-
-static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
-{
- return sp->role.ad_disabled;
-}
-
-static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
-{
- /*
- * When using the EPT page-modification log, the GPAs in the log
- * would come from L2 rather than L1. Therefore, we need to rely
- * on write protection to record dirty pages. This also bypasses
- * PML, since writes now result in a vmexit.
- */
- return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
-}
-
-static inline bool spte_ad_enabled(u64 spte)
-{
- MMU_WARN_ON(is_mmio_spte(spte));
- return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK;
-}
-
-static inline bool spte_ad_need_write_protect(u64 spte)
-{
- MMU_WARN_ON(is_mmio_spte(spte));
- return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
-}
-
-static bool is_nx_huge_page_enabled(void)
+bool is_nx_huge_page_enabled(void)
{
return READ_ONCE(nx_huge_pages);
}
-static inline u64 spte_shadow_accessed_mask(u64 spte)
-{
- MMU_WARN_ON(is_mmio_spte(spte));
- return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
-}
-
-static inline u64 spte_shadow_dirty_mask(u64 spte)
-{
- MMU_WARN_ON(is_mmio_spte(spte));
- return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
-}
-
-static inline bool is_access_track_spte(u64 spte)
-{
- return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
-}
-
-/*
- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
- * the memslots generation and is derived as follows:
- *
- * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
- * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
- *
- * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
- * the MMIO generation number, as doing so would require stealing a bit from
- * the "real" generation number and thus effectively halve the maximum number
- * of MMIO generations that can be handled before encountering a wrap (which
- * requires a full MMU zap). The flag is instead explicitly queried when
- * checking for MMIO spte cache hits.
- */
-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)
-
-#define MMIO_SPTE_GEN_LOW_START 3
-#define MMIO_SPTE_GEN_LOW_END 11
-#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
- MMIO_SPTE_GEN_LOW_START)
-
-#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
-#define MMIO_SPTE_GEN_HIGH_END 62
-#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
- MMIO_SPTE_GEN_HIGH_START)
-
-static u64 generation_mmio_spte_mask(u64 gen)
-{
- u64 mask;
-
- WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
- BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
-
- mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
- mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
- return mask;
-}
-
-static u64 get_mmio_spte_generation(u64 spte)
-{
- u64 gen;
-
- gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
- gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
- return gen;
-}
-
-static u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
-{
-
- u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
- u64 mask = generation_mmio_spte_mask(gen);
- u64 gpa = gfn << PAGE_SHIFT;
-
- access &= shadow_mmio_access_mask;
- mask |= shadow_mmio_value | access;
- mask |= gpa | shadow_nonpresent_or_rsvd_mask;
- mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
- << shadow_nonpresent_or_rsvd_mask_len;
-
- return mask;
-}
-
static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
unsigned int access)
{
u64 mask = make_mmio_spte(vcpu, gfn, access);
- unsigned int gen = get_mmio_spte_generation(mask);
-
- access = mask & ACC_ALL;
- trace_mark_mmio_spte(sptep, gfn, access, gen);
+ trace_mark_mmio_spte(sptep, gfn, mask);
mmu_spte_set(sptep, mask);
}
@@ -480,7 +225,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
{
u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
- gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
+ gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)
& shadow_nonpresent_or_rsvd_mask;
return gpa >> PAGE_SHIFT;
@@ -521,7 +266,7 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception)
{
/* Check if guest physical address doesn't exceed guest maximum */
- if (kvm_mmu_is_illegal_gpa(vcpu, gpa)) {
+ if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) {
exception->error_code |= PFERR_RSVD_MASK;
return UNMAPPED_GVA;
}
@@ -529,90 +274,6 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
return gpa;
}
-/*
- * Sets the shadow PTE masks used by the MMU.
- *
- * Assumptions:
- * - Setting either @accessed_mask or @dirty_mask requires setting both
- * - At least one of @accessed_mask or @acc_track_mask must be set
- */
-void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
- u64 acc_track_mask, u64 me_mask)
-{
- BUG_ON(!dirty_mask != !accessed_mask);
- BUG_ON(!accessed_mask && !acc_track_mask);
- BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK);
-
- shadow_user_mask = user_mask;
- shadow_accessed_mask = accessed_mask;
- shadow_dirty_mask = dirty_mask;
- shadow_nx_mask = nx_mask;
- shadow_x_mask = x_mask;
- shadow_present_mask = p_mask;
- shadow_acc_track_mask = acc_track_mask;
- shadow_me_mask = me_mask;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
-
-static u8 kvm_get_shadow_phys_bits(void)
-{
- /*
- * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
- * in CPU detection code, but the processor treats those reduced bits as
- * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
- * the physical address bits reported by CPUID.
- */
- if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
- return cpuid_eax(0x80000008) & 0xff;
-
- /*
- * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
- * custom CPUID. Proceed with whatever the kernel found since these features
- * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
- */
- return boot_cpu_data.x86_phys_bits;
-}
-
-static void kvm_mmu_reset_all_pte_masks(void)
-{
- u8 low_phys_bits;
-
- shadow_user_mask = 0;
- shadow_accessed_mask = 0;
- shadow_dirty_mask = 0;
- shadow_nx_mask = 0;
- shadow_x_mask = 0;
- shadow_present_mask = 0;
- shadow_acc_track_mask = 0;
-
- shadow_phys_bits = kvm_get_shadow_phys_bits();
-
- /*
- * If the CPU has 46 or less physical address bits, then set an
- * appropriate mask to guard against L1TF attacks. Otherwise, it is
- * assumed that the CPU is not vulnerable to L1TF.
- *
- * Some Intel CPUs address the L1 cache using more PA bits than are
- * reported by CPUID. Use the PA width of the L1 cache when possible
- * to achieve more effective mitigation, e.g. if system RAM overlaps
- * the most significant bits of legal physical address space.
- */
- shadow_nonpresent_or_rsvd_mask = 0;
- low_phys_bits = boot_cpu_data.x86_phys_bits;
- if (boot_cpu_has_bug(X86_BUG_L1TF) &&
- !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >=
- 52 - shadow_nonpresent_or_rsvd_mask_len)) {
- low_phys_bits = boot_cpu_data.x86_cache_bits
- - shadow_nonpresent_or_rsvd_mask_len;
- shadow_nonpresent_or_rsvd_mask =
- rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1);
- }
-
- shadow_nonpresent_or_rsvd_lower_gfn_mask =
- GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
-}
-
static int is_cpuid_PSE36(void)
{
return 1;
@@ -623,35 +284,6 @@ static int is_nx(struct kvm_vcpu *vcpu)
return vcpu->arch.efer & EFER_NX;
}
-static int is_shadow_present_pte(u64 pte)
-{
- return (pte != 0) && !is_mmio_spte(pte);
-}
-
-static int is_large_pte(u64 pte)
-{
- return pte & PT_PAGE_SIZE_MASK;
-}
-
-static int is_last_spte(u64 pte, int level)
-{
- if (level == PG_LEVEL_4K)
- return 1;
- if (is_large_pte(pte))
- return 1;
- return 0;
-}
-
-static bool is_executable_pte(u64 spte)
-{
- return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
-}
-
-static kvm_pfn_t spte_to_pfn(u64 pte)
-{
- return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
-}
-
static gfn_t pse36_gfn_delta(u32 gpte)
{
int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
@@ -796,12 +428,6 @@ retry:
}
#endif
-static bool spte_can_locklessly_be_made_writable(u64 spte)
-{
- return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
- (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
-}
-
static bool spte_has_volatile_bits(u64 spte)
{
if (!is_shadow_present_pte(spte))
@@ -826,21 +452,6 @@ static bool spte_has_volatile_bits(u64 spte)
return false;
}
-static bool is_accessed_spte(u64 spte)
-{
- u64 accessed_mask = spte_shadow_accessed_mask(spte);
-
- return accessed_mask ? spte & accessed_mask
- : !is_access_track_spte(spte);
-}
-
-static bool is_dirty_spte(u64 spte)
-{
- u64 dirty_mask = spte_shadow_dirty_mask(spte);
-
- return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
-}
-
/* Rules for using mmu_spte_set:
* Set the sptep from nonpresent to present.
* Note: the sptep being assigned *must* be either not present
@@ -976,47 +587,19 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
return __get_spte_lockless(sptep);
}
-static u64 mark_spte_for_access_track(u64 spte)
-{
- if (spte_ad_enabled(spte))
- return spte & ~shadow_accessed_mask;
-
- if (is_access_track_spte(spte))
- return spte;
-
- /*
- * Making an Access Tracking PTE will result in removal of write access
- * from the PTE. So, verify that we will be able to restore the write
- * access in the fast page fault path later on.
- */
- WARN_ONCE((spte & PT_WRITABLE_MASK) &&
- !spte_can_locklessly_be_made_writable(spte),
- "kvm: Writable SPTE is not locklessly dirty-trackable\n");
-
- WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask <<
- shadow_acc_track_saved_bits_shift),
- "kvm: Access Tracking saved bit locations are not zero\n");
-
- spte |= (spte & shadow_acc_track_saved_bits_mask) <<
- shadow_acc_track_saved_bits_shift;
- spte &= ~shadow_acc_track_mask;
-
- return spte;
-}
-
/* Restore an acc-track PTE back to a regular PTE */
static u64 restore_acc_track_spte(u64 spte)
{
u64 new_spte = spte;
- u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift)
- & shadow_acc_track_saved_bits_mask;
+ u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT)
+ & SHADOW_ACC_TRACK_SAVED_BITS_MASK;
WARN_ON_ONCE(spte_ad_enabled(spte));
WARN_ON_ONCE(!is_access_track_spte(spte));
new_spte &= ~shadow_acc_track_mask;
- new_spte &= ~(shadow_acc_track_saved_bits_mask <<
- shadow_acc_track_saved_bits_shift);
+ new_spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK <<
+ SHADOW_ACC_TRACK_SAVED_BITS_SHIFT);
new_spte |= saved_bits;
return new_spte;
@@ -1193,7 +776,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
-static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
if (sp->lpage_disallowed)
return;
@@ -1221,7 +804,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
-static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
--kvm->stat.nx_lpage_splits;
sp->lpage_disallowed = false;
@@ -1640,6 +1223,9 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
{
struct kvm_rmap_head *rmap_head;
+ if (kvm->arch.tdp_mmu_enabled)
+ kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
+ slot->base_gfn + gfn_offset, mask, true);
while (mask) {
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PG_LEVEL_4K, slot);
@@ -1666,6 +1252,9 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
{
struct kvm_rmap_head *rmap_head;
+ if (kvm->arch.tdp_mmu_enabled)
+ kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
+ slot->base_gfn + gfn_offset, mask, false);
while (mask) {
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PG_LEVEL_4K, slot);
@@ -1710,6 +1299,10 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
write_protected |= __rmap_write_protect(kvm, rmap_head, true);
}
+ if (kvm->arch.tdp_mmu_enabled)
+ write_protected |=
+ kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn);
+
return write_protected;
}
@@ -1769,13 +1362,8 @@ restart:
pte_list_remove(rmap_head, sptep);
goto restart;
} else {
- new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
- new_spte |= (u64)new_pfn << PAGE_SHIFT;
-
- new_spte &= ~PT_WRITABLE_MASK;
- new_spte &= ~SPTE_HOST_WRITEABLE;
-
- new_spte = mark_spte_for_access_track(new_spte);
+ new_spte = kvm_mmu_changed_pte_notifier_make_spte(
+ *sptep, new_pfn);
mmu_spte_clear_track_bits(sptep);
mmu_spte_set(sptep, new_spte);
@@ -1919,12 +1507,26 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
unsigned flags)
{
- return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+ int r;
+
+ r = kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+
+ if (kvm->arch.tdp_mmu_enabled)
+ r |= kvm_tdp_mmu_zap_hva_range(kvm, start, end);
+
+ return r;
}
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
- return kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+ int r;
+
+ r = kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+
+ if (kvm->arch.tdp_mmu_enabled)
+ r |= kvm_tdp_mmu_set_spte_hva(kvm, hva, &pte);
+
+ return r;
}
static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
@@ -1973,12 +1575,24 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
+ int young = false;
+
+ young = kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
+ if (kvm->arch.tdp_mmu_enabled)
+ young |= kvm_tdp_mmu_age_hva_range(kvm, start, end);
+
+ return young;
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
+ int young = false;
+
+ young = kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp);
+ if (kvm->arch.tdp_mmu_enabled)
+ young |= kvm_tdp_mmu_test_age_hva(kvm, hva);
+
+ return young;
}
#ifdef MMU_DEBUG
@@ -2577,13 +2191,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
- spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
- shadow_user_mask | shadow_x_mask | shadow_me_mask;
-
- if (sp_ad_disabled(sp))
- spte |= SPTE_AD_DISABLED_MASK;
- else
- spte |= shadow_accessed_mask;
+ spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));
mmu_spte_set(sptep, spte);
@@ -2615,8 +2223,9 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
}
-static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
- u64 *spte)
+/* Returns the number of zapped non-leaf child shadow pages. */
+static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
+ u64 *spte, struct list_head *invalid_list)
{
u64 pte;
struct kvm_mmu_page *child;
@@ -2630,23 +2239,34 @@ static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
} else {
child = to_shadow_page(pte & PT64_BASE_ADDR_MASK);
drop_parent_pte(child, spte);
- }
- return true;
- }
- if (is_mmio_spte(pte))
+ /*
+ * Recursively zap nested TDP SPs, parentless SPs are
+ * unlikely to be used again in the near future. This
+ * avoids retaining a large number of stale nested SPs.
+ */
+ if (tdp_enabled && invalid_list &&
+ child->role.guest_mode && !child->parent_ptes.val)
+ return kvm_mmu_prepare_zap_page(kvm, child,
+ invalid_list);
+ }
+ } else if (is_mmio_spte(pte)) {
mmu_spte_clear_no_track(spte);
-
- return false;
+ }
+ return 0;
}
-static void kvm_mmu_page_unlink_children(struct kvm *kvm,
- struct kvm_mmu_page *sp)
+static int kvm_mmu_page_unlink_children(struct kvm *kvm,
+ struct kvm_mmu_page *sp,
+ struct list_head *invalid_list)
{
+ int zapped = 0;
unsigned i;
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
- mmu_page_zap_pte(kvm, sp, sp->spt + i);
+ zapped += mmu_page_zap_pte(kvm, sp, sp->spt + i, invalid_list);
+
+ return zapped;
}
static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -2692,7 +2312,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
- kvm_mmu_page_unlink_children(kvm, sp);
+ *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
kvm_mmu_unlink_parents(kvm, sp);
/* Zapping children means active_mmu_pages has become unstable. */
@@ -2885,8 +2505,8 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
kvm_mmu_mark_parents_unsync(sp);
}
-static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
- bool can_unsync)
+bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool can_unsync)
{
struct kvm_mmu_page *sp;
@@ -2946,132 +2566,42 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
return false;
}
-static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
-{
- if (pfn_valid(pfn))
- return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
- /*
- * Some reserved pages, such as those from NVDIMM
- * DAX devices, are not for MMIO, and can be mapped
- * with cached memory type for better performance.
- * However, the above check misconceives those pages
- * as MMIO, and results in KVM mapping them with UC
- * memory type, which would hurt the performance.
- * Therefore, we check the host memory type in addition
- * and only treat UC/UC-/WC pages as MMIO.
- */
- (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
-
- return !e820__mapped_raw_any(pfn_to_hpa(pfn),
- pfn_to_hpa(pfn + 1) - 1,
- E820_TYPE_RAM);
-}
-
-/* Bits which may be returned by set_spte() */
-#define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
-#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
-
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned int pte_access, int level,
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
bool can_unsync, bool host_writable)
{
- u64 spte = 0;
- int ret = 0;
+ u64 spte;
struct kvm_mmu_page *sp;
+ int ret;
if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
return 0;
sp = sptep_to_sp(sptep);
- if (sp_ad_disabled(sp))
- spte |= SPTE_AD_DISABLED_MASK;
- else if (kvm_vcpu_ad_need_write_protect(vcpu))
- spte |= SPTE_AD_WRPROT_ONLY_MASK;
-
- /*
- * For the EPT case, shadow_present_mask is 0 if hardware
- * supports exec-only page table entries. In that case,
- * ACC_USER_MASK and shadow_user_mask are used to represent
- * read access. See FNAME(gpte_access) in paging_tmpl.h.
- */
- spte |= shadow_present_mask;
- if (!speculative)
- spte |= spte_shadow_accessed_mask(spte);
- if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
- is_nx_huge_page_enabled()) {
- pte_access &= ~ACC_EXEC_MASK;
- }
+ ret = make_spte(vcpu, pte_access, level, gfn, pfn, *sptep, speculative,
+ can_unsync, host_writable, sp_ad_disabled(sp), &spte);
- if (pte_access & ACC_EXEC_MASK)
- spte |= shadow_x_mask;
- else
- spte |= shadow_nx_mask;
-
- if (pte_access & ACC_USER_MASK)
- spte |= shadow_user_mask;
-
- if (level > PG_LEVEL_4K)
- spte |= PT_PAGE_SIZE_MASK;
- if (tdp_enabled)
- spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn,
- kvm_is_mmio_pfn(pfn));
-
- if (host_writable)
- spte |= SPTE_HOST_WRITEABLE;
- else
- pte_access &= ~ACC_WRITE_MASK;
-
- if (!kvm_is_mmio_pfn(pfn))
- spte |= shadow_me_mask;
-
- spte |= (u64)pfn << PAGE_SHIFT;
-
- if (pte_access & ACC_WRITE_MASK) {
- spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
-
- /*
- * Optimization: for pte sync, if spte was writable the hash
- * lookup is unnecessary (and expensive). Write protection
- * is responsibility of mmu_get_page / kvm_sync_page.
- * Same reasoning can be applied to dirty page accounting.
- */
- if (!can_unsync && is_writable_pte(*sptep))
- goto set_pte;
-
- if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
- pgprintk("%s: found shadow page for %llx, marking ro\n",
- __func__, gfn);
- ret |= SET_SPTE_WRITE_PROTECTED_PT;
- pte_access &= ~ACC_WRITE_MASK;
- spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
- }
- }
-
- if (pte_access & ACC_WRITE_MASK) {
+ if (spte & PT_WRITABLE_MASK)
kvm_vcpu_mark_page_dirty(vcpu, gfn);
- spte |= spte_shadow_dirty_mask(spte);
- }
- if (speculative)
- spte = mark_spte_for_access_track(spte);
-
-set_pte:
- if (mmu_spte_update(sptep, spte))
+ if (*sptep == spte)
+ ret |= SET_SPTE_SPURIOUS;
+ else if (mmu_spte_update(sptep, spte))
ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
return ret;
}
static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
- unsigned int pte_access, int write_fault, int level,
+ unsigned int pte_access, bool write_fault, int level,
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
bool host_writable)
{
int was_rmapped = 0;
int rmap_count;
int set_spte_ret;
- int ret = RET_PF_RETRY;
+ int ret = RET_PF_FIXED;
bool flush = false;
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
@@ -3113,6 +2643,15 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (unlikely(is_mmio_spte(*sptep)))
ret = RET_PF_EMULATE;
+ /*
+ * The fault is fully spurious if and only if the new SPTE and old SPTE
+ * are identical, and emulation is not required.
+ */
+ if ((set_spte_ret & SET_SPTE_SPURIOUS) && ret == RET_PF_FIXED) {
+ WARN_ON_ONCE(!was_rmapped);
+ return RET_PF_SPURIOUS;
+ }
+
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
trace_kvm_mmu_set_spte(level, gfn, sptep);
if (!was_rmapped && is_large_pte(*sptep))
@@ -3161,7 +2700,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
return -1;
for (i = 0; i < ret; i++, gfn++, start++) {
- mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+ mmu_set_spte(vcpu, start, access, false, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true);
put_page(pages[i]);
}
@@ -3239,8 +2778,9 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
return level;
}
-static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
- int max_level, kvm_pfn_t *pfnp)
+int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
+ int max_level, kvm_pfn_t *pfnp,
+ bool huge_page_disallowed, int *req_level)
{
struct kvm_memory_slot *slot;
struct kvm_lpage_info *linfo;
@@ -3248,6 +2788,8 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
kvm_pfn_t mask;
int level;
+ *req_level = PG_LEVEL_4K;
+
if (unlikely(max_level == PG_LEVEL_4K))
return PG_LEVEL_4K;
@@ -3272,7 +2814,14 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
if (level == PG_LEVEL_4K)
return level;
- level = min(level, max_level);
+ *req_level = level = min(level, max_level);
+
+ /*
+ * Enforce the iTLB multihit workaround after capturing the requested
+ * level, which will be used to do precise, accurate accounting.
+ */
+ if (huge_page_disallowed)
+ return PG_LEVEL_4K;
/*
* mmu_notifier_retry() was successful and mmu_lock is held, so
@@ -3285,14 +2834,12 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
return level;
}
-static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
- gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
+void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
+ kvm_pfn_t *pfnp, int *goal_levelp)
{
- int level = *levelp;
- u64 spte = *it.sptep;
+ int level = *goal_levelp;
- if (it.level == level && level > PG_LEVEL_4K &&
- is_nx_huge_page_enabled() &&
+ if (cur_level == level && level > PG_LEVEL_4K &&
is_shadow_present_pte(spte) &&
!is_large_pte(spte)) {
/*
@@ -3302,26 +2849,32 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
* patching back for them into pfn the next 9 bits of
* the address.
*/
- u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
+ u64 page_mask = KVM_PAGES_PER_HPAGE(level) -
+ KVM_PAGES_PER_HPAGE(level - 1);
*pfnp |= gfn & page_mask;
- (*levelp)--;
+ (*goal_levelp)--;
}
}
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
int map_writable, int max_level, kvm_pfn_t pfn,
- bool prefault, bool account_disallowed_nx_lpage)
+ bool prefault, bool is_tdp)
{
+ bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
+ bool write = error_code & PFERR_WRITE_MASK;
+ bool exec = error_code & PFERR_FETCH_MASK;
+ bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
- int level, ret;
+ int level, req_level, ret;
gfn_t gfn = gpa >> PAGE_SHIFT;
gfn_t base_gfn = gfn;
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
return RET_PF_RETRY;
- level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn);
+ level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn,
+ huge_page_disallowed, &req_level);
trace_kvm_mmu_spte_requested(gpa, level, pfn);
for_each_shadow_entry(vcpu, gpa, it) {
@@ -3329,7 +2882,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
* We cannot overwrite existing page tables with an NX
* large page, as the leaf could be executable.
*/
- disallowed_hugepage_adjust(it, gfn, &pfn, &level);
+ if (nx_huge_page_workaround_enabled)
+ disallowed_hugepage_adjust(*it.sptep, gfn, it.level,
+ &pfn, &level);
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == level)
@@ -3341,7 +2896,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
it.level - 1, true, ACC_ALL);
link_shadow_page(vcpu, it.sptep, sp);
- if (account_disallowed_nx_lpage)
+ if (is_tdp && huge_page_disallowed &&
+ req_level >= it.level)
account_huge_nx_page(vcpu->kvm, sp);
}
}
@@ -3349,6 +2905,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
write, level, base_gfn, pfn, prefault,
map_writable);
+ if (ret == RET_PF_SPURIOUS)
+ return ret;
+
direct_pte_prefetch(vcpu, it.sptep);
++vcpu->stat.pf_fixed;
return ret;
@@ -3479,21 +3038,19 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
}
/*
- * Return value:
- * - true: let the vcpu to access on the same address again.
- * - false: let the real page fault path to fix it.
+ * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS.
*/
-static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
- u32 error_code)
+static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ u32 error_code)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
- bool fault_handled = false;
+ int ret = RET_PF_INVALID;
u64 spte = 0ull;
uint retry_count = 0;
if (!page_fault_can_be_fast(error_code))
- return false;
+ return ret;
walk_shadow_page_lockless_begin(vcpu);
@@ -3519,7 +3076,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* they are always ACC_ALL.
*/
if (is_access_allowed(error_code, spte)) {
- fault_handled = true;
+ ret = RET_PF_SPURIOUS;
break;
}
@@ -3562,11 +3119,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* since the gfn is not stable for indirect shadow page. See
* Documentation/virt/kvm/locking.rst to get more detail.
*/
- fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
- iterator.sptep, spte,
- new_spte);
- if (fault_handled)
+ if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte,
+ new_spte)) {
+ ret = RET_PF_FIXED;
break;
+ }
if (++retry_count > 4) {
printk_once(KERN_WARNING
@@ -3577,10 +3134,10 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
} while (true);
trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
- spte, fault_handled);
+ spte, ret);
walk_shadow_page_lockless_end(vcpu);
- return fault_handled;
+ return ret;
}
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
@@ -3592,9 +3149,13 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
return;
sp = to_shadow_page(*root_hpa & PT64_BASE_ADDR_MASK);
- --sp->root_count;
- if (!sp->root_count && sp->role.invalid)
- kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+
+ if (kvm_mmu_put_root(kvm, sp)) {
+ if (sp->tdp_mmu_page)
+ kvm_tdp_mmu_free_root(kvm, sp);
+ else if (sp->role.invalid)
+ kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+ }
*root_hpa = INVALID_PAGE;
}
@@ -3603,6 +3164,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
ulong roots_to_free)
{
+ struct kvm *kvm = vcpu->kvm;
int i;
LIST_HEAD(invalid_list);
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
@@ -3620,22 +3182,21 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
return;
}
- spin_lock(&vcpu->kvm->mmu_lock);
+ spin_lock(&kvm->mmu_lock);
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
- mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
+ mmu_free_root_page(kvm, &mmu->prev_roots[i].hpa,
&invalid_list);
if (free_active_root) {
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
(mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
- mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
- &invalid_list);
+ mmu_free_root_page(kvm, &mmu->root_hpa, &invalid_list);
} else {
for (i = 0; i < 4; ++i)
if (mmu->pae_root[i] != 0)
- mmu_free_root_page(vcpu->kvm,
+ mmu_free_root_page(kvm,
&mmu->pae_root[i],
&invalid_list);
mmu->root_hpa = INVALID_PAGE;
@@ -3643,8 +3204,8 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
mmu->root_pgd = 0;
}
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ spin_unlock(&kvm->mmu_lock);
}
EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
@@ -3684,8 +3245,16 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
hpa_t root;
unsigned i;
- if (shadow_root_level >= PT64_ROOT_4LEVEL) {
- root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level, true);
+ if (vcpu->kvm->arch.tdp_mmu_enabled) {
+ root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
+
+ if (!VALID_PAGE(root))
+ return -ENOSPC;
+ vcpu->arch.mmu->root_hpa = root;
+ } else if (shadow_root_level >= PT64_ROOT_4LEVEL) {
+ root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level,
+ true);
+
if (!VALID_PAGE(root))
return -ENOSPC;
vcpu->arch.mmu->root_hpa = root;
@@ -3910,54 +3479,82 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
return vcpu_match_mmio_gva(vcpu, addr);
}
-/* return true if reserved bit is detected on spte. */
-static bool
-walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+/*
+ * Return the level of the lowest level SPTE added to sptes.
+ * That SPTE may be non-present.
+ */
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
{
struct kvm_shadow_walk_iterator iterator;
- u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
- struct rsvd_bits_validate *rsvd_check;
- int root, leaf;
- bool reserved = false;
+ int leaf = vcpu->arch.mmu->root_level;
+ u64 spte;
- rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
walk_shadow_page_lockless_begin(vcpu);
- for (shadow_walk_init(&iterator, vcpu, addr),
- leaf = root = iterator.level;
+ for (shadow_walk_init(&iterator, vcpu, addr);
shadow_walk_okay(&iterator);
__shadow_walk_next(&iterator, spte)) {
+ leaf = iterator.level;
spte = mmu_spte_get_lockless(iterator.sptep);
sptes[leaf - 1] = spte;
- leaf--;
if (!is_shadow_present_pte(spte))
break;
+ }
+
+ walk_shadow_page_lockless_end(vcpu);
+
+ return leaf;
+}
+
+/* return true if reserved bit is detected on spte. */
+static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+{
+ u64 sptes[PT64_ROOT_MAX_LEVEL];
+ struct rsvd_bits_validate *rsvd_check;
+ int root = vcpu->arch.mmu->root_level;
+ int leaf;
+ int level;
+ bool reserved = false;
+
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
+ *sptep = 0ull;
+ return reserved;
+ }
+
+ if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
+ leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+ else
+ leaf = get_walk(vcpu, addr, sptes);
+
+ rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
+
+ for (level = root; level >= leaf; level--) {
+ if (!is_shadow_present_pte(sptes[level - 1]))
+ break;
/*
* Use a bitwise-OR instead of a logical-OR to aggregate the
* reserved bit and EPT's invalid memtype/XWR checks to avoid
* adding a Jcc in the loop.
*/
- reserved |= __is_bad_mt_xwr(rsvd_check, spte) |
- __is_rsvd_bits_set(rsvd_check, spte, iterator.level);
+ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
+ __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
+ level);
}
- walk_shadow_page_lockless_end(vcpu);
-
if (reserved) {
pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
__func__, addr);
- while (root > leaf) {
+ for (level = root; level >= leaf; level--)
pr_err("------ spte 0x%llx level %d.\n",
- sptes[root - 1], root);
- root--;
- }
+ sptes[level - 1], level);
}
- *sptep = spte;
+ *sptep = sptes[leaf - 1];
+
return reserved;
}
@@ -3969,7 +3566,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
if (mmio_info_in_cache(vcpu, addr, direct))
return RET_PF_EMULATE;
- reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+ reserved = get_mmio_spte(vcpu, addr, &spte);
if (WARN_ON(reserved))
return -EINVAL;
@@ -4080,8 +3677,6 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
bool prefault, int max_level, bool is_tdp)
{
bool write = error_code & PFERR_WRITE_MASK;
- bool exec = error_code & PFERR_FETCH_MASK;
- bool lpage_disallowed = exec && is_nx_huge_page_enabled();
bool map_writable;
gfn_t gfn = gpa >> PAGE_SHIFT;
@@ -4092,16 +3687,16 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
- if (fast_page_fault(vcpu, gpa, error_code))
- return RET_PF_RETRY;
+ if (!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) {
+ r = fast_page_fault(vcpu, gpa, error_code);
+ if (r != RET_PF_INVALID)
+ return r;
+ }
r = mmu_topup_memory_caches(vcpu, false);
if (r)
return r;
- if (lpage_disallowed)
- max_level = PG_LEVEL_4K;
-
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
@@ -4118,8 +3713,13 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
r = make_mmu_pages_available(vcpu);
if (r)
goto out_unlock;
- r = __direct_map(vcpu, gpa, write, map_writable, max_level, pfn,
- prefault, is_tdp && lpage_disallowed);
+
+ if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
+ r = kvm_tdp_mmu_map(vcpu, gpa, error_code, map_writable, max_level,
+ pfn, prefault);
+ else
+ r = __direct_map(vcpu, gpa, error_code, map_writable, max_level, pfn,
+ prefault, is_tdp);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -4292,7 +3892,13 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
*/
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
- __clear_sp_write_flooding_count(to_shadow_page(vcpu->arch.mmu->root_hpa));
+ /*
+ * If this is a direct root page, it doesn't have a write flooding
+ * count. Otherwise, clear the write flooding count.
+ */
+ if (!new_role.direct)
+ __clear_sp_write_flooding_count(
+ to_shadow_page(vcpu->arch.mmu->root_hpa));
}
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
@@ -5400,7 +5006,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
entry = *spte;
- mmu_page_zap_pte(vcpu->kvm, sp, spte);
+ mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
if (gentry &&
!((sp->role.word ^ base_role) & ~role_ign.word) &&
rmap_can_add(vcpu))
@@ -5450,13 +5056,14 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
if (r == RET_PF_INVALID) {
r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
lower_32_bits(error_code), false);
- WARN_ON(r == RET_PF_INVALID);
+ if (WARN_ON_ONCE(r == RET_PF_INVALID))
+ return -EIO;
}
- if (r == RET_PF_RETRY)
- return 1;
if (r < 0)
return r;
+ if (r != RET_PF_EMULATE)
+ return 1;
/*
* Before emulating the instruction, check if the error code
@@ -5485,18 +5092,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
emulate:
- /*
- * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
- * This can happen if a guest gets a page-fault on data access but the HW
- * table walker is not able to read the instruction page (e.g instruction
- * page is not present in memory). In those cases we simply restart the
- * guest, with the exception of AMD Erratum 1096 which is unrecoverable.
- */
- if (unlikely(insn && !insn_len)) {
- if (!kvm_x86_ops.need_emulation_on_page_fault(vcpu))
- return 1;
- }
-
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
insn_len);
}
@@ -5682,11 +5277,17 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
free_page((unsigned long)mmu->lm_root);
}
-static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
struct page *page;
int i;
+ mmu->root_hpa = INVALID_PAGE;
+ mmu->root_pgd = 0;
+ mmu->translate_gpa = translate_gpa;
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+
/*
* When using PAE paging, the four PDPTEs are treated as 'root' pages,
* while the PDP table is a per-vCPU construct that's allocated at MMU
@@ -5712,7 +5313,6 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
- uint i;
int ret;
vcpu->arch.mmu_pte_list_desc_cache.kmem_cache = pte_list_desc_cache;
@@ -5726,25 +5326,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
- vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
- vcpu->arch.root_mmu.root_pgd = 0;
- vcpu->arch.root_mmu.translate_gpa = translate_gpa;
- for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-
- vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
- vcpu->arch.guest_mmu.root_pgd = 0;
- vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
- for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
- ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
+ ret = __kvm_mmu_create(vcpu, &vcpu->arch.guest_mmu);
if (ret)
return ret;
- ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
+ ret = __kvm_mmu_create(vcpu, &vcpu->arch.root_mmu);
if (ret)
goto fail_allocate_root;
@@ -5841,6 +5429,10 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
kvm_reload_remote_mmus(kvm);
kvm_zap_obsolete_pages(kvm);
+
+ if (kvm->arch.tdp_mmu_enabled)
+ kvm_tdp_mmu_zap_all(kvm);
+
spin_unlock(&kvm->mmu_lock);
}
@@ -5860,6 +5452,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+ kvm_mmu_init_tdp_mmu(kvm);
+
node->track_write = kvm_mmu_pte_write;
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
kvm_page_track_register_notifier(kvm, node);
@@ -5870,6 +5464,8 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
kvm_page_track_unregister_notifier(kvm, node);
+
+ kvm_mmu_uninit_tdp_mmu(kvm);
}
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
@@ -5877,6 +5473,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i;
+ bool flush;
spin_lock(&kvm->mmu_lock);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
@@ -5896,6 +5493,12 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
}
}
+ if (kvm->arch.tdp_mmu_enabled) {
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, gfn_start, gfn_end);
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+ }
+
spin_unlock(&kvm->mmu_lock);
}
@@ -5914,6 +5517,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
+ if (kvm->arch.tdp_mmu_enabled)
+ flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K);
spin_unlock(&kvm->mmu_lock);
/*
@@ -5977,6 +5582,9 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
kvm_mmu_zap_collapsible_spte, true);
+
+ if (kvm->arch.tdp_mmu_enabled)
+ kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
}
@@ -6002,6 +5610,8 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
+ if (kvm->arch.tdp_mmu_enabled)
+ flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
/*
@@ -6023,6 +5633,8 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
false);
+ if (kvm->arch.tdp_mmu_enabled)
+ flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
spin_unlock(&kvm->mmu_lock);
if (flush)
@@ -6037,6 +5649,8 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
+ if (kvm->arch.tdp_mmu_enabled)
+ flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
if (flush)
@@ -6062,6 +5676,10 @@ restart:
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
+
+ if (kvm->arch.tdp_mmu_enabled)
+ kvm_tdp_mmu_zap_all(kvm);
+
spin_unlock(&kvm->mmu_lock);
}
@@ -6357,7 +5975,10 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
- while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+ for ( ; to_zap; --to_zap) {
+ if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages))
+ break;
+
/*
* We use a separate list instead of just using active_mmu_pages
* because the number of lpage_disallowed pages is expected to
@@ -6367,15 +5988,20 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
struct kvm_mmu_page,
lpage_disallowed_link);
WARN_ON_ONCE(!sp->lpage_disallowed);
- kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
- WARN_ON_ONCE(sp->lpage_disallowed);
+ if (sp->tdp_mmu_page)
+ kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
+ sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
+ else {
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ WARN_ON_ONCE(sp->lpage_disallowed);
+ }
- if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
kvm_mmu_commit_zap_page(kvm, &invalid_list);
- if (to_zap)
- cond_resched_lock(&kvm->mmu_lock);
+ cond_resched_lock(&kvm->mmu_lock);
}
}
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 3acf3b8eb469..bfc6389edc28 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -3,9 +3,23 @@
#define __KVM_X86_MMU_INTERNAL_H
#include <linux/types.h>
-
+#include <linux/kvm_host.h>
#include <asm/kvm_host.h>
+#undef MMU_DEBUG
+
+#ifdef MMU_DEBUG
+extern bool dbg;
+
+#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
+#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
+#define MMU_WARN_ON(x) WARN_ON(x)
+#else
+#define pgprintk(x...) do { } while (0)
+#define rmap_printk(x...) do { } while (0)
+#define MMU_WARN_ON(x) do { } while (0)
+#endif
+
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
@@ -41,8 +55,12 @@ struct kvm_mmu_page {
/* Number of writes since the last time traversal visited this page. */
atomic_t write_flooding_count;
+
+ bool tdp_mmu_page;
};
+extern struct kmem_cache *mmu_page_header_cache;
+
static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -55,9 +73,77 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
return to_shadow_page(__pa(sptep));
}
+static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
+{
+ /*
+ * When using the EPT page-modification log, the GPAs in the log
+ * would come from L2 rather than L1. Therefore, we need to rely
+ * on write protection to record dirty pages. This also bypasses
+ * PML, since writes now result in a vmexit.
+ */
+ return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+}
+
+bool is_nx_huge_page_enabled(void);
+bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+ bool can_unsync);
+
void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
+void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
+ u64 start_gfn, u64 pages);
+
+static inline void kvm_mmu_get_root(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ BUG_ON(!sp->root_count);
+ lockdep_assert_held(&kvm->mmu_lock);
+
+ ++sp->root_count;
+}
+
+static inline bool kvm_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ lockdep_assert_held(&kvm->mmu_lock);
+ --sp->root_count;
+
+ return !sp->root_count;
+}
+
+/*
+ * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault().
+ *
+ * RET_PF_RETRY: let CPU fault again on the address.
+ * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
+ * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
+ * RET_PF_FIXED: The faulting entry has been fixed.
+ * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
+ */
+enum {
+ RET_PF_RETRY = 0,
+ RET_PF_EMULATE,
+ RET_PF_INVALID,
+ RET_PF_FIXED,
+ RET_PF_SPURIOUS,
+};
+
+/* Bits which may be returned by set_spte() */
+#define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
+#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
+#define SET_SPTE_SPURIOUS BIT(2)
+
+int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
+ int max_level, kvm_pfn_t *pfnp,
+ bool huge_page_disallowed, int *req_level);
+void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
+ kvm_pfn_t *pfnp, int *goal_levelp);
+
+bool is_nx_huge_page_enabled(void);
+
+void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
+
+void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
#endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index 9d15bc0c535b..213699b27b44 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -202,8 +202,8 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page,
TRACE_EVENT(
mark_mmio_spte,
- TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access, unsigned int gen),
- TP_ARGS(sptep, gfn, access, gen),
+ TP_PROTO(u64 *sptep, gfn_t gfn, u64 spte),
+ TP_ARGS(sptep, gfn, spte),
TP_STRUCT__entry(
__field(void *, sptep)
@@ -215,8 +215,8 @@ TRACE_EVENT(
TP_fast_assign(
__entry->sptep = sptep;
__entry->gfn = gfn;
- __entry->access = access;
- __entry->gen = gen;
+ __entry->access = spte & ACC_ALL;
+ __entry->gen = get_mmio_spte_generation(spte);
),
TP_printk("sptep:%p gfn %llx access %x gen %x", __entry->sptep,
@@ -244,14 +244,11 @@ TRACE_EVENT(
__entry->access)
);
-#define __spte_satisfied(__spte) \
- (__entry->retry && is_writable_pte(__entry->__spte))
-
TRACE_EVENT(
fast_page_fault,
TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
- u64 *sptep, u64 old_spte, bool retry),
- TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry),
+ u64 *sptep, u64 old_spte, int ret),
+ TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, ret),
TP_STRUCT__entry(
__field(int, vcpu_id)
@@ -260,7 +257,7 @@ TRACE_EVENT(
__field(u64 *, sptep)
__field(u64, old_spte)
__field(u64, new_spte)
- __field(bool, retry)
+ __field(int, ret)
),
TP_fast_assign(
@@ -270,7 +267,7 @@ TRACE_EVENT(
__entry->sptep = sptep;
__entry->old_spte = old_spte;
__entry->new_spte = *sptep;
- __entry->retry = retry;
+ __entry->ret = ret;
),
TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
@@ -278,7 +275,7 @@ TRACE_EVENT(
__entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
kvm_mmu_trace_pferr_flags), __entry->sptep,
__entry->old_spte, __entry->new_spte,
- __spte_satisfied(old_spte), __spte_satisfied(new_spte)
+ __entry->ret == RET_PF_SPURIOUS, __entry->ret == RET_PF_FIXED
)
);
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index a84a141a2ad2..8443a675715b 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -229,7 +229,8 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
return;
idx = srcu_read_lock(&head->track_srcu);
- hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
+ srcu_read_lock_held(&head->track_srcu))
if (n->track_write)
n->track_write(vcpu, gpa, new, bytes, n);
srcu_read_unlock(&head->track_srcu, idx);
@@ -254,7 +255,8 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
return;
idx = srcu_read_lock(&head->track_srcu);
- hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
+ srcu_read_lock_held(&head->track_srcu))
if (n->track_flush_slot)
n->track_flush_slot(kvm, slot, n);
srcu_read_unlock(&head->track_srcu, idx);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 4dd6b1e5b8cf..50e268eb8e1a 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -550,7 +550,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* we call mmu_set_spte() with host_writable = true because
* pte_prefetch_gfn_to_pfn always gets a writable pfn.
*/
- mmu_set_spte(vcpu, spte, pte_access, 0, PG_LEVEL_4K, gfn, pfn,
+ mmu_set_spte(vcpu, spte, pte_access, false, PG_LEVEL_4K, gfn, pfn,
true, true);
kvm_release_pfn_clean(pfn);
@@ -625,15 +625,18 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
* emulate this operation, return 1 to indicate this case.
*/
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
- struct guest_walker *gw,
- int write_fault, int max_level,
- kvm_pfn_t pfn, bool map_writable, bool prefault,
- bool lpage_disallowed)
+ struct guest_walker *gw, u32 error_code,
+ int max_level, kvm_pfn_t pfn, bool map_writable,
+ bool prefault)
{
+ bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
+ bool write_fault = error_code & PFERR_WRITE_MASK;
+ bool exec = error_code & PFERR_FETCH_MASK;
+ bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
- int top_level, hlevel, ret;
+ int top_level, level, req_level, ret;
gfn_t base_gfn = gw->gfn;
direct_access = gw->pte_access;
@@ -679,7 +682,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
- hlevel = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, max_level, &pfn);
+ level = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, max_level, &pfn,
+ huge_page_disallowed, &req_level);
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
@@ -690,10 +694,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
* We cannot overwrite existing page tables with an NX
* large page, as the leaf could be executable.
*/
- disallowed_hugepage_adjust(it, gw->gfn, &pfn, &hlevel);
+ if (nx_huge_page_workaround_enabled)
+ disallowed_hugepage_adjust(*it.sptep, gw->gfn, it.level,
+ &pfn, &level);
base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
- if (it.level == hlevel)
+ if (it.level == level)
break;
validate_direct_spte(vcpu, it.sptep, direct_access);
@@ -704,13 +710,16 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
it.level - 1, true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
- if (lpage_disallowed)
+ if (huge_page_disallowed && req_level >= it.level)
account_huge_nx_page(vcpu->kvm, sp);
}
}
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
it.level, base_gfn, pfn, prefault, map_writable);
+ if (ret == RET_PF_SPURIOUS)
+ return ret;
+
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
++vcpu->stat.pf_fixed;
return ret;
@@ -738,7 +747,7 @@ out_gpte_changed:
*/
static bool
FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
- struct guest_walker *walker, int user_fault,
+ struct guest_walker *walker, bool user_fault,
bool *write_fault_to_shadow_pgtable)
{
int level;
@@ -776,15 +785,13 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
bool prefault)
{
- int write_fault = error_code & PFERR_WRITE_MASK;
- int user_fault = error_code & PFERR_USER_MASK;
+ bool write_fault = error_code & PFERR_WRITE_MASK;
+ bool user_fault = error_code & PFERR_USER_MASK;
struct guest_walker walker;
int r;
kvm_pfn_t pfn;
unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
- bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
- is_nx_huge_page_enabled();
int max_level;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -825,7 +832,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
&walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
- if (lpage_disallowed || is_self_change_mapping)
+ if (is_self_change_mapping)
max_level = PG_LEVEL_4K;
else
max_level = walker.level;
@@ -869,8 +876,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
r = make_mmu_pages_available(vcpu);
if (r)
goto out_unlock;
- r = FNAME(fetch)(vcpu, addr, &walker, write_fault, max_level, pfn,
- map_writable, prefault, lpage_disallowed);
+ r = FNAME(fetch)(vcpu, addr, &walker, error_code, max_level, pfn,
+ map_writable, prefault);
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
out_unlock:
@@ -895,6 +902,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
+ u64 old_spte;
int level;
u64 *sptep;
@@ -917,7 +925,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
sptep = iterator.sptep;
sp = sptep_to_sp(sptep);
- if (is_last_spte(*sptep, level)) {
+ old_spte = *sptep;
+ if (is_last_spte(old_spte, level)) {
pt_element_t gpte;
gpa_t pte_gpa;
@@ -927,7 +936,8 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
pte_gpa = FNAME(get_level1_sp_gpa)(sp);
pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
- if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
+ mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL);
+ if (is_shadow_present_pte(old_spte))
kvm_flush_remote_tlbs_with_address(vcpu->kvm,
sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
new file mode 100644
index 000000000000..fcac2cac78fe
--- /dev/null
+++ b/arch/x86/kvm/mmu/spte.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * Macros and functions to access KVM PTEs (also known as SPTEs)
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2020 Red Hat, Inc. and/or its affiliates.
+ */
+
+
+#include <linux/kvm_host.h>
+#include "mmu.h"
+#include "mmu_internal.h"
+#include "x86.h"
+#include "spte.h"
+
+#include <asm/e820/api.h>
+
+u64 __read_mostly shadow_nx_mask;
+u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
+u64 __read_mostly shadow_user_mask;
+u64 __read_mostly shadow_accessed_mask;
+u64 __read_mostly shadow_dirty_mask;
+u64 __read_mostly shadow_mmio_value;
+u64 __read_mostly shadow_mmio_access_mask;
+u64 __read_mostly shadow_present_mask;
+u64 __read_mostly shadow_me_mask;
+u64 __read_mostly shadow_acc_track_mask;
+
+u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
+u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+
+u8 __read_mostly shadow_phys_bits;
+
+static u64 generation_mmio_spte_mask(u64 gen)
+{
+ u64 mask;
+
+ WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+ BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
+
+ mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
+ mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
+ return mask;
+}
+
+u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
+{
+ u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK;
+ u64 mask = generation_mmio_spte_mask(gen);
+ u64 gpa = gfn << PAGE_SHIFT;
+
+ access &= shadow_mmio_access_mask;
+ mask |= shadow_mmio_value | access;
+ mask |= gpa | shadow_nonpresent_or_rsvd_mask;
+ mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
+ << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
+
+ return mask;
+}
+
+static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
+{
+ if (pfn_valid(pfn))
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
+ /*
+ * Some reserved pages, such as those from NVDIMM
+ * DAX devices, are not for MMIO, and can be mapped
+ * with cached memory type for better performance.
+ * However, the above check misconceives those pages
+ * as MMIO, and results in KVM mapping them with UC
+ * memory type, which would hurt the performance.
+ * Therefore, we check the host memory type in addition
+ * and only treat UC/UC-/WC pages as MMIO.
+ */
+ (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
+
+ return !e820__mapped_raw_any(pfn_to_hpa(pfn),
+ pfn_to_hpa(pfn + 1) - 1,
+ E820_TYPE_RAM);
+}
+
+int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
+ gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool speculative,
+ bool can_unsync, bool host_writable, bool ad_disabled,
+ u64 *new_spte)
+{
+ u64 spte = 0;
+ int ret = 0;
+
+ if (ad_disabled)
+ spte |= SPTE_AD_DISABLED_MASK;
+ else if (kvm_vcpu_ad_need_write_protect(vcpu))
+ spte |= SPTE_AD_WRPROT_ONLY_MASK;
+
+ /*
+ * For the EPT case, shadow_present_mask is 0 if hardware
+ * supports exec-only page table entries. In that case,
+ * ACC_USER_MASK and shadow_user_mask are used to represent
+ * read access. See FNAME(gpte_access) in paging_tmpl.h.
+ */
+ spte |= shadow_present_mask;
+ if (!speculative)
+ spte |= spte_shadow_accessed_mask(spte);
+
+ if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
+ is_nx_huge_page_enabled()) {
+ pte_access &= ~ACC_EXEC_MASK;
+ }
+
+ if (pte_access & ACC_EXEC_MASK)
+ spte |= shadow_x_mask;
+ else
+ spte |= shadow_nx_mask;
+
+ if (pte_access & ACC_USER_MASK)
+ spte |= shadow_user_mask;
+
+ if (level > PG_LEVEL_4K)
+ spte |= PT_PAGE_SIZE_MASK;
+ if (tdp_enabled)
+ spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn,
+ kvm_is_mmio_pfn(pfn));
+
+ if (host_writable)
+ spte |= SPTE_HOST_WRITEABLE;
+ else
+ pte_access &= ~ACC_WRITE_MASK;
+
+ if (!kvm_is_mmio_pfn(pfn))
+ spte |= shadow_me_mask;
+
+ spte |= (u64)pfn << PAGE_SHIFT;
+
+ if (pte_access & ACC_WRITE_MASK) {
+ spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
+
+ /*
+ * Optimization: for pte sync, if spte was writable the hash
+ * lookup is unnecessary (and expensive). Write protection
+ * is responsibility of mmu_get_page / kvm_sync_page.
+ * Same reasoning can be applied to dirty page accounting.
+ */
+ if (!can_unsync && is_writable_pte(old_spte))
+ goto out;
+
+ if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
+ pgprintk("%s: found shadow page for %llx, marking ro\n",
+ __func__, gfn);
+ ret |= SET_SPTE_WRITE_PROTECTED_PT;
+ pte_access &= ~ACC_WRITE_MASK;
+ spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
+ }
+ }
+
+ if (pte_access & ACC_WRITE_MASK)
+ spte |= spte_shadow_dirty_mask(spte);
+
+ if (speculative)
+ spte = mark_spte_for_access_track(spte);
+
+out:
+ *new_spte = spte;
+ return ret;
+}
+
+u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled)
+{
+ u64 spte;
+
+ spte = __pa(child_pt) | shadow_present_mask | PT_WRITABLE_MASK |
+ shadow_user_mask | shadow_x_mask | shadow_me_mask;
+
+ if (ad_disabled)
+ spte |= SPTE_AD_DISABLED_MASK;
+ else
+ spte |= shadow_accessed_mask;
+
+ return spte;
+}
+
+u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn)
+{
+ u64 new_spte;
+
+ new_spte = old_spte & ~PT64_BASE_ADDR_MASK;
+ new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+ new_spte &= ~PT_WRITABLE_MASK;
+ new_spte &= ~SPTE_HOST_WRITEABLE;
+
+ new_spte = mark_spte_for_access_track(new_spte);
+
+ return new_spte;
+}
+
+static u8 kvm_get_shadow_phys_bits(void)
+{
+ /*
+ * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
+ * in CPU detection code, but the processor treats those reduced bits as
+ * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
+ * the physical address bits reported by CPUID.
+ */
+ if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
+ return cpuid_eax(0x80000008) & 0xff;
+
+ /*
+ * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
+ * custom CPUID. Proceed with whatever the kernel found since these features
+ * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
+ */
+ return boot_cpu_data.x86_phys_bits;
+}
+
+u64 mark_spte_for_access_track(u64 spte)
+{
+ if (spte_ad_enabled(spte))
+ return spte & ~shadow_accessed_mask;
+
+ if (is_access_track_spte(spte))
+ return spte;
+
+ /*
+ * Making an Access Tracking PTE will result in removal of write access
+ * from the PTE. So, verify that we will be able to restore the write
+ * access in the fast page fault path later on.
+ */
+ WARN_ONCE((spte & PT_WRITABLE_MASK) &&
+ !spte_can_locklessly_be_made_writable(spte),
+ "kvm: Writable SPTE is not locklessly dirty-trackable\n");
+
+ WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK <<
+ SHADOW_ACC_TRACK_SAVED_BITS_SHIFT),
+ "kvm: Access Tracking saved bit locations are not zero\n");
+
+ spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) <<
+ SHADOW_ACC_TRACK_SAVED_BITS_SHIFT;
+ spte &= ~shadow_acc_track_mask;
+
+ return spte;
+}
+
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask)
+{
+ BUG_ON((u64)(unsigned)access_mask != access_mask);
+ WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN));
+ WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
+ shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
+ shadow_mmio_access_mask = access_mask;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+
+/*
+ * Sets the shadow PTE masks used by the MMU.
+ *
+ * Assumptions:
+ * - Setting either @accessed_mask or @dirty_mask requires setting both
+ * - At least one of @accessed_mask or @acc_track_mask must be set
+ */
+void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
+ u64 acc_track_mask, u64 me_mask)
+{
+ BUG_ON(!dirty_mask != !accessed_mask);
+ BUG_ON(!accessed_mask && !acc_track_mask);
+ BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK);
+
+ shadow_user_mask = user_mask;
+ shadow_accessed_mask = accessed_mask;
+ shadow_dirty_mask = dirty_mask;
+ shadow_nx_mask = nx_mask;
+ shadow_x_mask = x_mask;
+ shadow_present_mask = p_mask;
+ shadow_acc_track_mask = acc_track_mask;
+ shadow_me_mask = me_mask;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+
+void kvm_mmu_reset_all_pte_masks(void)
+{
+ u8 low_phys_bits;
+
+ shadow_user_mask = 0;
+ shadow_accessed_mask = 0;
+ shadow_dirty_mask = 0;
+ shadow_nx_mask = 0;
+ shadow_x_mask = 0;
+ shadow_present_mask = 0;
+ shadow_acc_track_mask = 0;
+
+ shadow_phys_bits = kvm_get_shadow_phys_bits();
+
+ /*
+ * If the CPU has 46 or less physical address bits, then set an
+ * appropriate mask to guard against L1TF attacks. Otherwise, it is
+ * assumed that the CPU is not vulnerable to L1TF.
+ *
+ * Some Intel CPUs address the L1 cache using more PA bits than are
+ * reported by CPUID. Use the PA width of the L1 cache when possible
+ * to achieve more effective mitigation, e.g. if system RAM overlaps
+ * the most significant bits of legal physical address space.
+ */
+ shadow_nonpresent_or_rsvd_mask = 0;
+ low_phys_bits = boot_cpu_data.x86_phys_bits;
+ if (boot_cpu_has_bug(X86_BUG_L1TF) &&
+ !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >=
+ 52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) {
+ low_phys_bits = boot_cpu_data.x86_cache_bits
+ - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
+ shadow_nonpresent_or_rsvd_mask =
+ rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1);
+ }
+
+ shadow_nonpresent_or_rsvd_lower_gfn_mask =
+ GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
+}
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
new file mode 100644
index 000000000000..5c75a451c000
--- /dev/null
+++ b/arch/x86/kvm/mmu/spte.h
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifndef KVM_X86_MMU_SPTE_H
+#define KVM_X86_MMU_SPTE_H
+
+#include "mmu_internal.h"
+
+#define PT_FIRST_AVAIL_BITS_SHIFT 10
+#define PT64_SECOND_AVAIL_BITS_SHIFT 54
+
+/*
+ * The mask used to denote special SPTEs, which can be either MMIO SPTEs or
+ * Access Tracking SPTEs.
+ */
+#define SPTE_SPECIAL_MASK (3ULL << 52)
+#define SPTE_AD_ENABLED_MASK (0ULL << 52)
+#define SPTE_AD_DISABLED_MASK (1ULL << 52)
+#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52)
+#define SPTE_MMIO_MASK (3ULL << 52)
+
+#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
+#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1))
+#else
+#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
+#endif
+#define PT64_LVL_ADDR_MASK(level) \
+ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
+ * PT64_LEVEL_BITS))) - 1))
+#define PT64_LVL_OFFSET_MASK(level) \
+ (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
+ * PT64_LEVEL_BITS))) - 1))
+
+#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
+ | shadow_x_mask | shadow_nx_mask | shadow_me_mask)
+
+#define ACC_EXEC_MASK 1
+#define ACC_WRITE_MASK PT_WRITABLE_MASK
+#define ACC_USER_MASK PT_USER_MASK
+#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+
+/* The mask for the R/X bits in EPT PTEs */
+#define PT64_EPT_READABLE_MASK 0x1ull
+#define PT64_EPT_EXECUTABLE_MASK 0x4ull
+
+#define PT64_LEVEL_BITS 9
+
+#define PT64_LEVEL_SHIFT(level) \
+ (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS)
+
+#define PT64_INDEX(address, level)\
+ (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
+#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+
+
+#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+
+/*
+ * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * the memslots generation and is derived as follows:
+ *
+ * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
+ * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
+ *
+ * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
+ * the MMIO generation number, as doing so would require stealing a bit from
+ * the "real" generation number and thus effectively halve the maximum number
+ * of MMIO generations that can be handled before encountering a wrap (which
+ * requires a full MMU zap). The flag is instead explicitly queried when
+ * checking for MMIO spte cache hits.
+ */
+#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)
+
+#define MMIO_SPTE_GEN_LOW_START 3
+#define MMIO_SPTE_GEN_LOW_END 11
+#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+ MMIO_SPTE_GEN_LOW_START)
+
+#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
+#define MMIO_SPTE_GEN_HIGH_END 62
+#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
+ MMIO_SPTE_GEN_HIGH_START)
+
+extern u64 __read_mostly shadow_nx_mask;
+extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
+extern u64 __read_mostly shadow_user_mask;
+extern u64 __read_mostly shadow_accessed_mask;
+extern u64 __read_mostly shadow_dirty_mask;
+extern u64 __read_mostly shadow_mmio_value;
+extern u64 __read_mostly shadow_mmio_access_mask;
+extern u64 __read_mostly shadow_present_mask;
+extern u64 __read_mostly shadow_me_mask;
+
+/*
+ * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
+ * shadow_acc_track_mask is the set of bits to be cleared in non-accessed
+ * pages.
+ */
+extern u64 __read_mostly shadow_acc_track_mask;
+
+/*
+ * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
+ * to guard against L1TF attacks.
+ */
+extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
+
+/*
+ * The number of high-order 1 bits to use in the mask above.
+ */
+#define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5
+
+/*
+ * The mask/shift to use for saving the original R/X bits when marking the PTE
+ * as not-present for access tracking purposes. We do not save the W bit as the
+ * PTEs being access tracked also need to be dirty tracked, so the W bit will be
+ * restored only when a write is attempted to the page.
+ */
+#define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \
+ PT64_EPT_EXECUTABLE_MASK)
+#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT
+
+/*
+ * In some cases, we need to preserve the GFN of a non-present or reserved
+ * SPTE when we usurp the upper five bits of the physical address space to
+ * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll
+ * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask
+ * left into the reserved bits, i.e. the GFN in the SPTE will be split into
+ * high and low parts. This mask covers the lower bits of the GFN.
+ */
+extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
+
+/*
+ * The number of non-reserved physical address bits irrespective of features
+ * that repurpose legal bits, e.g. MKTME.
+ */
+extern u8 __read_mostly shadow_phys_bits;
+
+static inline bool is_mmio_spte(u64 spte)
+{
+ return (spte & SPTE_SPECIAL_MASK) == SPTE_MMIO_MASK;
+}
+
+static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
+{
+ return sp->role.ad_disabled;
+}
+
+static inline bool spte_ad_enabled(u64 spte)
+{
+ MMU_WARN_ON(is_mmio_spte(spte));
+ return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK;
+}
+
+static inline bool spte_ad_need_write_protect(u64 spte)
+{
+ MMU_WARN_ON(is_mmio_spte(spte));
+ return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
+}
+
+static inline u64 spte_shadow_accessed_mask(u64 spte)
+{
+ MMU_WARN_ON(is_mmio_spte(spte));
+ return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
+}
+
+static inline u64 spte_shadow_dirty_mask(u64 spte)
+{
+ MMU_WARN_ON(is_mmio_spte(spte));
+ return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
+}
+
+static inline bool is_access_track_spte(u64 spte)
+{
+ return !spte_ad_enabled(spte) && (spte & shadow_acc_track_mask) == 0;
+}
+
+static inline int is_shadow_present_pte(u64 pte)
+{
+ return (pte != 0) && !is_mmio_spte(pte);
+}
+
+static inline int is_large_pte(u64 pte)
+{
+ return pte & PT_PAGE_SIZE_MASK;
+}
+
+static inline int is_last_spte(u64 pte, int level)
+{
+ if (level == PG_LEVEL_4K)
+ return 1;
+ if (is_large_pte(pte))
+ return 1;
+ return 0;
+}
+
+static inline bool is_executable_pte(u64 spte)
+{
+ return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
+}
+
+static inline kvm_pfn_t spte_to_pfn(u64 pte)
+{
+ return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+}
+
+static inline bool is_accessed_spte(u64 spte)
+{
+ u64 accessed_mask = spte_shadow_accessed_mask(spte);
+
+ return accessed_mask ? spte & accessed_mask
+ : !is_access_track_spte(spte);
+}
+
+static inline bool is_dirty_spte(u64 spte)
+{
+ u64 dirty_mask = spte_shadow_dirty_mask(spte);
+
+ return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
+}
+
+static inline bool spte_can_locklessly_be_made_writable(u64 spte)
+{
+ return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) ==
+ (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE);
+}
+
+static inline u64 get_mmio_spte_generation(u64 spte)
+{
+ u64 gen;
+
+ gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
+ gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
+ return gen;
+}
+
+/* Bits which may be returned by set_spte() */
+#define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
+#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
+#define SET_SPTE_SPURIOUS BIT(2)
+
+int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
+ gfn_t gfn, kvm_pfn_t pfn, u64 old_spte, bool speculative,
+ bool can_unsync, bool host_writable, bool ad_disabled,
+ u64 *new_spte);
+u64 make_nonleaf_spte(u64 *child_pt, bool ad_disabled);
+u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access);
+u64 mark_spte_for_access_track(u64 spte);
+u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn);
+
+void kvm_mmu_reset_all_pte_masks(void);
+
+#endif
diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
new file mode 100644
index 000000000000..87b7e16911db
--- /dev/null
+++ b/arch/x86/kvm/mmu/tdp_iter.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "mmu_internal.h"
+#include "tdp_iter.h"
+#include "spte.h"
+
+/*
+ * Recalculates the pointer to the SPTE for the current GFN and level and
+ * reread the SPTE.
+ */
+static void tdp_iter_refresh_sptep(struct tdp_iter *iter)
+{
+ iter->sptep = iter->pt_path[iter->level - 1] +
+ SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level);
+ iter->old_spte = READ_ONCE(*iter->sptep);
+}
+
+static gfn_t round_gfn_for_level(gfn_t gfn, int level)
+{
+ return gfn & -KVM_PAGES_PER_HPAGE(level);
+}
+
+/*
+ * Sets a TDP iterator to walk a pre-order traversal of the paging structure
+ * rooted at root_pt, starting with the walk to translate goal_gfn.
+ */
+void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+ int min_level, gfn_t goal_gfn)
+{
+ WARN_ON(root_level < 1);
+ WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+
+ iter->goal_gfn = goal_gfn;
+ iter->root_level = root_level;
+ iter->min_level = min_level;
+ iter->level = root_level;
+ iter->pt_path[iter->level - 1] = root_pt;
+
+ iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level);
+ tdp_iter_refresh_sptep(iter);
+
+ iter->valid = true;
+}
+
+/*
+ * Given an SPTE and its level, returns a pointer containing the host virtual
+ * address of the child page table referenced by the SPTE. Returns null if
+ * there is no such entry.
+ */
+u64 *spte_to_child_pt(u64 spte, int level)
+{
+ /*
+ * There's no child entry if this entry isn't present or is a
+ * last-level entry.
+ */
+ if (!is_shadow_present_pte(spte) || is_last_spte(spte, level))
+ return NULL;
+
+ return __va(spte_to_pfn(spte) << PAGE_SHIFT);
+}
+
+/*
+ * Steps down one level in the paging structure towards the goal GFN. Returns
+ * true if the iterator was able to step down a level, false otherwise.
+ */
+static bool try_step_down(struct tdp_iter *iter)
+{
+ u64 *child_pt;
+
+ if (iter->level == iter->min_level)
+ return false;
+
+ /*
+ * Reread the SPTE before stepping down to avoid traversing into page
+ * tables that are no longer linked from this entry.
+ */
+ iter->old_spte = READ_ONCE(*iter->sptep);
+
+ child_pt = spte_to_child_pt(iter->old_spte, iter->level);
+ if (!child_pt)
+ return false;
+
+ iter->level--;
+ iter->pt_path[iter->level - 1] = child_pt;
+ iter->gfn = round_gfn_for_level(iter->goal_gfn, iter->level);
+ tdp_iter_refresh_sptep(iter);
+
+ return true;
+}
+
+/*
+ * Steps to the next entry in the current page table, at the current page table
+ * level. The next entry could point to a page backing guest memory or another
+ * page table, or it could be non-present. Returns true if the iterator was
+ * able to step to the next entry in the page table, false if the iterator was
+ * already at the end of the current page table.
+ */
+static bool try_step_side(struct tdp_iter *iter)
+{
+ /*
+ * Check if the iterator is already at the end of the current page
+ * table.
+ */
+ if (SHADOW_PT_INDEX(iter->gfn << PAGE_SHIFT, iter->level) ==
+ (PT64_ENT_PER_PAGE - 1))
+ return false;
+
+ iter->gfn += KVM_PAGES_PER_HPAGE(iter->level);
+ iter->goal_gfn = iter->gfn;
+ iter->sptep++;
+ iter->old_spte = READ_ONCE(*iter->sptep);
+
+ return true;
+}
+
+/*
+ * Tries to traverse back up a level in the paging structure so that the walk
+ * can continue from the next entry in the parent page table. Returns true on a
+ * successful step up, false if already in the root page.
+ */
+static bool try_step_up(struct tdp_iter *iter)
+{
+ if (iter->level == iter->root_level)
+ return false;
+
+ iter->level++;
+ iter->gfn = round_gfn_for_level(iter->gfn, iter->level);
+ tdp_iter_refresh_sptep(iter);
+
+ return true;
+}
+
+/*
+ * Step to the next SPTE in a pre-order traversal of the paging structure.
+ * To get to the next SPTE, the iterator either steps down towards the goal
+ * GFN, if at a present, non-last-level SPTE, or over to a SPTE mapping a
+ * highter GFN.
+ *
+ * The basic algorithm is as follows:
+ * 1. If the current SPTE is a non-last-level SPTE, step down into the page
+ * table it points to.
+ * 2. If the iterator cannot step down, it will try to step to the next SPTE
+ * in the current page of the paging structure.
+ * 3. If the iterator cannot step to the next entry in the current page, it will
+ * try to step up to the parent paging structure page. In this case, that
+ * SPTE will have already been visited, and so the iterator must also step
+ * to the side again.
+ */
+void tdp_iter_next(struct tdp_iter *iter)
+{
+ if (try_step_down(iter))
+ return;
+
+ do {
+ if (try_step_side(iter))
+ return;
+ } while (try_step_up(iter));
+ iter->valid = false;
+}
+
+/*
+ * Restart the walk over the paging structure from the root, starting from the
+ * highest gfn the iterator had previously reached. Assumes that the entire
+ * paging structure, except the root page, may have been completely torn down
+ * and rebuilt.
+ */
+void tdp_iter_refresh_walk(struct tdp_iter *iter)
+{
+ gfn_t goal_gfn = iter->goal_gfn;
+
+ if (iter->gfn > goal_gfn)
+ goal_gfn = iter->gfn;
+
+ tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
+ iter->root_level, iter->min_level, goal_gfn);
+}
+
+u64 *tdp_iter_root_pt(struct tdp_iter *iter)
+{
+ return iter->pt_path[iter->root_level - 1];
+}
+
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
new file mode 100644
index 000000000000..47170d0dc98e
--- /dev/null
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __KVM_X86_MMU_TDP_ITER_H
+#define __KVM_X86_MMU_TDP_ITER_H
+
+#include <linux/kvm_host.h>
+
+#include "mmu.h"
+
+/*
+ * A TDP iterator performs a pre-order walk over a TDP paging structure.
+ */
+struct tdp_iter {
+ /*
+ * The iterator will traverse the paging structure towards the mapping
+ * for this GFN.
+ */
+ gfn_t goal_gfn;
+ /* Pointers to the page tables traversed to reach the current SPTE */
+ u64 *pt_path[PT64_ROOT_MAX_LEVEL];
+ /* A pointer to the current SPTE */
+ u64 *sptep;
+ /* The lowest GFN mapped by the current SPTE */
+ gfn_t gfn;
+ /* The level of the root page given to the iterator */
+ int root_level;
+ /* The lowest level the iterator should traverse to */
+ int min_level;
+ /* The iterator's current level within the paging structure */
+ int level;
+ /* A snapshot of the value at sptep */
+ u64 old_spte;
+ /*
+ * Whether the iterator has a valid state. This will be false if the
+ * iterator walks off the end of the paging structure.
+ */
+ bool valid;
+};
+
+/*
+ * Iterates over every SPTE mapping the GFN range [start, end) in a
+ * preorder traversal.
+ */
+#define for_each_tdp_pte_min_level(iter, root, root_level, min_level, start, end) \
+ for (tdp_iter_start(&iter, root, root_level, min_level, start); \
+ iter.valid && iter.gfn < end; \
+ tdp_iter_next(&iter))
+
+#define for_each_tdp_pte(iter, root, root_level, start, end) \
+ for_each_tdp_pte_min_level(iter, root, root_level, PG_LEVEL_4K, start, end)
+
+u64 *spte_to_child_pt(u64 pte, int level);
+
+void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
+ int min_level, gfn_t goal_gfn);
+void tdp_iter_next(struct tdp_iter *iter);
+void tdp_iter_refresh_walk(struct tdp_iter *iter);
+u64 *tdp_iter_root_pt(struct tdp_iter *iter);
+
+#endif /* __KVM_X86_MMU_TDP_ITER_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
new file mode 100644
index 000000000000..27e381c9da6c
--- /dev/null
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -0,0 +1,1157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "mmu.h"
+#include "mmu_internal.h"
+#include "mmutrace.h"
+#include "tdp_iter.h"
+#include "tdp_mmu.h"
+#include "spte.h"
+
+#ifdef CONFIG_X86_64
+static bool __read_mostly tdp_mmu_enabled = false;
+module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
+#endif
+
+static bool is_tdp_mmu_enabled(void)
+{
+#ifdef CONFIG_X86_64
+ return tdp_enabled && READ_ONCE(tdp_mmu_enabled);
+#else
+ return false;
+#endif /* CONFIG_X86_64 */
+}
+
+/* Initializes the TDP MMU for the VM, if enabled. */
+void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+{
+ if (!is_tdp_mmu_enabled())
+ return;
+
+ /* This should not be changed for the lifetime of the VM. */
+ kvm->arch.tdp_mmu_enabled = true;
+
+ INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
+ INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
+}
+
+void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
+{
+ if (!kvm->arch.tdp_mmu_enabled)
+ return;
+
+ WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
+}
+
+#define for_each_tdp_mmu_root(_kvm, _root) \
+ list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
+
+bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
+{
+ struct kvm_mmu_page *sp;
+
+ sp = to_shadow_page(hpa);
+
+ return sp->tdp_mmu_page && sp->root_count;
+}
+
+static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ gfn_t start, gfn_t end, bool can_yield);
+
+void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
+{
+ gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+
+ lockdep_assert_held(&kvm->mmu_lock);
+
+ WARN_ON(root->root_count);
+ WARN_ON(!root->tdp_mmu_page);
+
+ list_del(&root->link);
+
+ zap_gfn_range(kvm, root, 0, max_gfn, false);
+
+ free_page((unsigned long)root->spt);
+ kmem_cache_free(mmu_page_header_cache, root);
+}
+
+static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
+ int level)
+{
+ union kvm_mmu_page_role role;
+
+ role = vcpu->arch.mmu->mmu_role.base;
+ role.level = level;
+ role.direct = true;
+ role.gpte_is_8_bytes = true;
+ role.access = ACC_ALL;
+
+ return role;
+}
+
+static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
+ int level)
+{
+ struct kvm_mmu_page *sp;
+
+ sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
+ sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
+ set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+ sp->role.word = page_role_for_level(vcpu, level).word;
+ sp->gfn = gfn;
+ sp->tdp_mmu_page = true;
+
+ return sp;
+}
+
+static struct kvm_mmu_page *get_tdp_mmu_vcpu_root(struct kvm_vcpu *vcpu)
+{
+ union kvm_mmu_page_role role;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_page *root;
+
+ role = page_role_for_level(vcpu, vcpu->arch.mmu->shadow_root_level);
+
+ spin_lock(&kvm->mmu_lock);
+
+ /* Check for an existing root before allocating a new one. */
+ for_each_tdp_mmu_root(kvm, root) {
+ if (root->role.word == role.word) {
+ kvm_mmu_get_root(kvm, root);
+ spin_unlock(&kvm->mmu_lock);
+ return root;
+ }
+ }
+
+ root = alloc_tdp_mmu_page(vcpu, 0, vcpu->arch.mmu->shadow_root_level);
+ root->root_count = 1;
+
+ list_add(&root->link, &kvm->arch.tdp_mmu_roots);
+
+ spin_unlock(&kvm->mmu_lock);
+
+ return root;
+}
+
+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu_page *root;
+
+ root = get_tdp_mmu_vcpu_root(vcpu);
+ if (!root)
+ return INVALID_PAGE;
+
+ return __pa(root->spt);
+}
+
+static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ u64 old_spte, u64 new_spte, int level);
+
+static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
+{
+ return sp->role.smm ? 1 : 0;
+}
+
+static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
+{
+ bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
+
+ if (!is_shadow_present_pte(old_spte) || !is_last_spte(old_spte, level))
+ return;
+
+ if (is_accessed_spte(old_spte) &&
+ (!is_accessed_spte(new_spte) || pfn_changed))
+ kvm_set_pfn_accessed(spte_to_pfn(old_spte));
+}
+
+static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
+ u64 old_spte, u64 new_spte, int level)
+{
+ bool pfn_changed;
+ struct kvm_memory_slot *slot;
+
+ if (level > PG_LEVEL_4K)
+ return;
+
+ pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
+
+ if ((!is_writable_pte(old_spte) || pfn_changed) &&
+ is_writable_pte(new_spte)) {
+ slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn);
+ mark_page_dirty_in_slot(slot, gfn);
+ }
+}
+
+/**
+ * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * @kvm: kvm instance
+ * @as_id: the address space of the paging structure the SPTE was a part of
+ * @gfn: the base GFN that was mapped by the SPTE
+ * @old_spte: The value of the SPTE before the change
+ * @new_spte: The value of the SPTE after the change
+ * @level: the level of the PT the SPTE is part of in the paging structure
+ *
+ * Handle bookkeeping that might result from the modification of a SPTE.
+ * This function must be called for all TDP SPTE modifications.
+ */
+static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ u64 old_spte, u64 new_spte, int level)
+{
+ bool was_present = is_shadow_present_pte(old_spte);
+ bool is_present = is_shadow_present_pte(new_spte);
+ bool was_leaf = was_present && is_last_spte(old_spte, level);
+ bool is_leaf = is_present && is_last_spte(new_spte, level);
+ bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
+ u64 *pt;
+ struct kvm_mmu_page *sp;
+ u64 old_child_spte;
+ int i;
+
+ WARN_ON(level > PT64_ROOT_MAX_LEVEL);
+ WARN_ON(level < PG_LEVEL_4K);
+ WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+
+ /*
+ * If this warning were to trigger it would indicate that there was a
+ * missing MMU notifier or a race with some notifier handler.
+ * A present, leaf SPTE should never be directly replaced with another
+ * present leaf SPTE pointing to a differnt PFN. A notifier handler
+ * should be zapping the SPTE before the main MM's page table is
+ * changed, or the SPTE should be zeroed, and the TLBs flushed by the
+ * thread before replacement.
+ */
+ if (was_leaf && is_leaf && pfn_changed) {
+ pr_err("Invalid SPTE change: cannot replace a present leaf\n"
+ "SPTE with another present leaf SPTE mapping a\n"
+ "different PFN!\n"
+ "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
+ as_id, gfn, old_spte, new_spte, level);
+
+ /*
+ * Crash the host to prevent error propagation and guest data
+ * courruption.
+ */
+ BUG();
+ }
+
+ if (old_spte == new_spte)
+ return;
+
+ /*
+ * The only times a SPTE should be changed from a non-present to
+ * non-present state is when an MMIO entry is installed/modified/
+ * removed. In that case, there is nothing to do here.
+ */
+ if (!was_present && !is_present) {
+ /*
+ * If this change does not involve a MMIO SPTE, it is
+ * unexpected. Log the change, though it should not impact the
+ * guest since both the former and current SPTEs are nonpresent.
+ */
+ if (WARN_ON(!is_mmio_spte(old_spte) && !is_mmio_spte(new_spte)))
+ pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
+ "should not be replaced with another,\n"
+ "different nonpresent SPTE, unless one or both\n"
+ "are MMIO SPTEs.\n"
+ "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d",
+ as_id, gfn, old_spte, new_spte, level);
+ return;
+ }
+
+
+ if (was_leaf && is_dirty_spte(old_spte) &&
+ (!is_dirty_spte(new_spte) || pfn_changed))
+ kvm_set_pfn_dirty(spte_to_pfn(old_spte));
+
+ /*
+ * Recursively handle child PTs if the change removed a subtree from
+ * the paging structure.
+ */
+ if (was_present && !was_leaf && (pfn_changed || !is_present)) {
+ pt = spte_to_child_pt(old_spte, level);
+ sp = sptep_to_sp(pt);
+
+ list_del(&sp->link);
+
+ if (sp->lpage_disallowed)
+ unaccount_huge_nx_page(kvm, sp);
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+ old_child_spte = READ_ONCE(*(pt + i));
+ WRITE_ONCE(*(pt + i), 0);
+ handle_changed_spte(kvm, as_id,
+ gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
+ old_child_spte, 0, level - 1);
+ }
+
+ kvm_flush_remote_tlbs_with_address(kvm, gfn,
+ KVM_PAGES_PER_HPAGE(level));
+
+ free_page((unsigned long)pt);
+ kmem_cache_free(mmu_page_header_cache, sp);
+ }
+}
+
+static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
+ u64 old_spte, u64 new_spte, int level)
+{
+ __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level);
+ handle_changed_spte_acc_track(old_spte, new_spte, level);
+ handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
+ new_spte, level);
+}
+
+static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ u64 new_spte, bool record_acc_track,
+ bool record_dirty_log)
+{
+ u64 *root_pt = tdp_iter_root_pt(iter);
+ struct kvm_mmu_page *root = sptep_to_sp(root_pt);
+ int as_id = kvm_mmu_page_as_id(root);
+
+ WRITE_ONCE(*iter->sptep, new_spte);
+
+ __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
+ iter->level);
+ if (record_acc_track)
+ handle_changed_spte_acc_track(iter->old_spte, new_spte,
+ iter->level);
+ if (record_dirty_log)
+ handle_changed_spte_dirty_log(kvm, as_id, iter->gfn,
+ iter->old_spte, new_spte,
+ iter->level);
+}
+
+static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
+ u64 new_spte)
+{
+ __tdp_mmu_set_spte(kvm, iter, new_spte, true, true);
+}
+
+static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
+ struct tdp_iter *iter,
+ u64 new_spte)
+{
+ __tdp_mmu_set_spte(kvm, iter, new_spte, false, true);
+}
+
+static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
+ struct tdp_iter *iter,
+ u64 new_spte)
+{
+ __tdp_mmu_set_spte(kvm, iter, new_spte, true, false);
+}
+
+#define tdp_root_for_each_pte(_iter, _root, _start, _end) \
+ for_each_tdp_pte(_iter, _root->spt, _root->role.level, _start, _end)
+
+#define tdp_root_for_each_leaf_pte(_iter, _root, _start, _end) \
+ tdp_root_for_each_pte(_iter, _root, _start, _end) \
+ if (!is_shadow_present_pte(_iter.old_spte) || \
+ !is_last_spte(_iter.old_spte, _iter.level)) \
+ continue; \
+ else
+
+#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \
+ for_each_tdp_pte(_iter, __va(_mmu->root_hpa), \
+ _mmu->shadow_root_level, _start, _end)
+
+/*
+ * Flush the TLB if the process should drop kvm->mmu_lock.
+ * Return whether the caller still needs to flush the tlb.
+ */
+static bool tdp_mmu_iter_flush_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
+{
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ kvm_flush_remote_tlbs(kvm);
+ cond_resched_lock(&kvm->mmu_lock);
+ tdp_iter_refresh_walk(iter);
+ return false;
+ } else {
+ return true;
+ }
+}
+
+static void tdp_mmu_iter_cond_resched(struct kvm *kvm, struct tdp_iter *iter)
+{
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ cond_resched_lock(&kvm->mmu_lock);
+ tdp_iter_refresh_walk(iter);
+ }
+}
+
+/*
+ * Tears down the mappings for the range of gfns, [start, end), and frees the
+ * non-root pages mapping GFNs strictly within that range. Returns true if
+ * SPTEs have been cleared and a TLB flush is needed before releasing the
+ * MMU lock.
+ * If can_yield is true, will release the MMU lock and reschedule if the
+ * scheduler needs the CPU or there is contention on the MMU lock. If this
+ * function cannot yield, it will not release the MMU lock or reschedule and
+ * the caller must ensure it does not supply too large a GFN range, or the
+ * operation can cause a soft lockup.
+ */
+static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ gfn_t start, gfn_t end, bool can_yield)
+{
+ struct tdp_iter iter;
+ bool flush_needed = false;
+
+ tdp_root_for_each_pte(iter, root, start, end) {
+ if (!is_shadow_present_pte(iter.old_spte))
+ continue;
+
+ /*
+ * If this is a non-last-level SPTE that covers a larger range
+ * than should be zapped, continue, and zap the mappings at a
+ * lower level.
+ */
+ if ((iter.gfn < start ||
+ iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
+ !is_last_spte(iter.old_spte, iter.level))
+ continue;
+
+ tdp_mmu_set_spte(kvm, &iter, 0);
+
+ if (can_yield)
+ flush_needed = tdp_mmu_iter_flush_cond_resched(kvm, &iter);
+ else
+ flush_needed = true;
+ }
+ return flush_needed;
+}
+
+/*
+ * Tears down the mappings for the range of gfns, [start, end), and frees the
+ * non-root pages mapping GFNs strictly within that range. Returns true if
+ * SPTEs have been cleared and a TLB flush is needed before releasing the
+ * MMU lock.
+ */
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
+{
+ struct kvm_mmu_page *root;
+ bool flush = false;
+
+ for_each_tdp_mmu_root(kvm, root) {
+ /*
+ * Take a reference on the root so that it cannot be freed if
+ * this thread releases the MMU lock and yields in this loop.
+ */
+ kvm_mmu_get_root(kvm, root);
+
+ flush |= zap_gfn_range(kvm, root, start, end, true);
+
+ kvm_mmu_put_root(kvm, root);
+ }
+
+ return flush;
+}
+
+void kvm_tdp_mmu_zap_all(struct kvm *kvm)
+{
+ gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+ bool flush;
+
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+}
+
+/*
+ * Installs a last-level SPTE to handle a TDP page fault.
+ * (NPT/EPT violation/misconfiguration)
+ */
+static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
+ int map_writable,
+ struct tdp_iter *iter,
+ kvm_pfn_t pfn, bool prefault)
+{
+ u64 new_spte;
+ int ret = 0;
+ int make_spte_ret = 0;
+
+ if (unlikely(is_noslot_pfn(pfn))) {
+ new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
+ trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte);
+ } else
+ make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
+ pfn, iter->old_spte, prefault, true,
+ map_writable, !shadow_accessed_mask,
+ &new_spte);
+
+ if (new_spte == iter->old_spte)
+ ret = RET_PF_SPURIOUS;
+ else
+ tdp_mmu_set_spte(vcpu->kvm, iter, new_spte);
+
+ /*
+ * If the page fault was caused by a write but the page is write
+ * protected, emulation is needed. If the emulation was skipped,
+ * the vCPU would have the same fault again.
+ */
+ if (make_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
+ if (write)
+ ret = RET_PF_EMULATE;
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+
+ /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
+ if (unlikely(is_mmio_spte(new_spte)))
+ ret = RET_PF_EMULATE;
+
+ trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
+ if (!prefault)
+ vcpu->stat.pf_fixed++;
+
+ return ret;
+}
+
+/*
+ * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing
+ * page tables and SPTEs to translate the faulting guest physical address.
+ */
+int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ int map_writable, int max_level, kvm_pfn_t pfn,
+ bool prefault)
+{
+ bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
+ bool write = error_code & PFERR_WRITE_MASK;
+ bool exec = error_code & PFERR_FETCH_MASK;
+ bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
+ struct tdp_iter iter;
+ struct kvm_mmu_page *sp;
+ u64 *child_pt;
+ u64 new_spte;
+ int ret;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ int level;
+ int req_level;
+
+ if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
+ return RET_PF_RETRY;
+ if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)))
+ return RET_PF_RETRY;
+
+ level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn,
+ huge_page_disallowed, &req_level);
+
+ trace_kvm_mmu_spte_requested(gpa, level, pfn);
+ tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+ if (nx_huge_page_workaround_enabled)
+ disallowed_hugepage_adjust(iter.old_spte, gfn,
+ iter.level, &pfn, &level);
+
+ if (iter.level == level)
+ break;
+
+ /*
+ * If there is an SPTE mapping a large page at a higher level
+ * than the target, that SPTE must be cleared and replaced
+ * with a non-leaf SPTE.
+ */
+ if (is_shadow_present_pte(iter.old_spte) &&
+ is_large_pte(iter.old_spte)) {
+ tdp_mmu_set_spte(vcpu->kvm, &iter, 0);
+
+ kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn,
+ KVM_PAGES_PER_HPAGE(iter.level));
+
+ /*
+ * The iter must explicitly re-read the spte here
+ * because the new value informs the !present
+ * path below.
+ */
+ iter.old_spte = READ_ONCE(*iter.sptep);
+ }
+
+ if (!is_shadow_present_pte(iter.old_spte)) {
+ sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
+ list_add(&sp->link, &vcpu->kvm->arch.tdp_mmu_pages);
+ child_pt = sp->spt;
+ clear_page(child_pt);
+ new_spte = make_nonleaf_spte(child_pt,
+ !shadow_accessed_mask);
+
+ trace_kvm_mmu_get_page(sp, true);
+ if (huge_page_disallowed && req_level >= iter.level)
+ account_huge_nx_page(vcpu->kvm, sp);
+
+ tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte);
+ }
+ }
+
+ if (WARN_ON(iter.level != level))
+ return RET_PF_RETRY;
+
+ ret = tdp_mmu_map_handle_target_level(vcpu, write, map_writable, &iter,
+ pfn, prefault);
+
+ return ret;
+}
+
+static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
+ unsigned long end, unsigned long data,
+ int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_mmu_page *root, gfn_t start,
+ gfn_t end, unsigned long data))
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ struct kvm_mmu_page *root;
+ int ret = 0;
+ int as_id;
+
+ for_each_tdp_mmu_root(kvm, root) {
+ /*
+ * Take a reference on the root so that it cannot be freed if
+ * this thread releases the MMU lock and yields in this loop.
+ */
+ kvm_mmu_get_root(kvm, root);
+
+ as_id = kvm_mmu_page_as_id(root);
+ slots = __kvm_memslots(kvm, as_id);
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long hva_start, hva_end;
+ gfn_t gfn_start, gfn_end;
+
+ hva_start = max(start, memslot->userspace_addr);
+ hva_end = min(end, memslot->userspace_addr +
+ (memslot->npages << PAGE_SHIFT));
+ if (hva_start >= hva_end)
+ continue;
+ /*
+ * {gfn(page) | page intersects with [hva_start, hva_end)} =
+ * {gfn_start, gfn_start+1, ..., gfn_end-1}.
+ */
+ gfn_start = hva_to_gfn_memslot(hva_start, memslot);
+ gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+ ret |= handler(kvm, memslot, root, gfn_start,
+ gfn_end, data);
+ }
+
+ kvm_mmu_put_root(kvm, root);
+ }
+
+ return ret;
+}
+
+static int zap_gfn_range_hva_wrapper(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ struct kvm_mmu_page *root, gfn_t start,
+ gfn_t end, unsigned long unused)
+{
+ return zap_gfn_range(kvm, root, start, end, false);
+}
+
+int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
+ unsigned long end)
+{
+ return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0,
+ zap_gfn_range_hva_wrapper);
+}
+
+/*
+ * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
+ * if any of the GFNs in the range have been accessed.
+ */
+static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_mmu_page *root, gfn_t start, gfn_t end,
+ unsigned long unused)
+{
+ struct tdp_iter iter;
+ int young = 0;
+ u64 new_spte = 0;
+
+ tdp_root_for_each_leaf_pte(iter, root, start, end) {
+ /*
+ * If we have a non-accessed entry we don't need to change the
+ * pte.
+ */
+ if (!is_accessed_spte(iter.old_spte))
+ continue;
+
+ new_spte = iter.old_spte;
+
+ if (spte_ad_enabled(new_spte)) {
+ clear_bit((ffs(shadow_accessed_mask) - 1),
+ (unsigned long *)&new_spte);
+ } else {
+ /*
+ * Capture the dirty status of the page, so that it doesn't get
+ * lost when the SPTE is marked for access tracking.
+ */
+ if (is_writable_pte(new_spte))
+ kvm_set_pfn_dirty(spte_to_pfn(new_spte));
+
+ new_spte = mark_spte_for_access_track(new_spte);
+ }
+ new_spte &= ~shadow_dirty_mask;
+
+ tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte);
+ young = 1;
+ }
+
+ return young;
+}
+
+int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start,
+ unsigned long end)
+{
+ return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0,
+ age_gfn_range);
+}
+
+static int test_age_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused,
+ unsigned long unused2)
+{
+ struct tdp_iter iter;
+
+ tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1)
+ if (is_accessed_spte(iter.old_spte))
+ return 1;
+
+ return 0;
+}
+
+int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return kvm_tdp_mmu_handle_hva_range(kvm, hva, hva + 1, 0,
+ test_age_gfn);
+}
+
+/*
+ * Handle the changed_pte MMU notifier for the TDP MMU.
+ * data is a pointer to the new pte_t mapping the HVA specified by the MMU
+ * notifier.
+ * Returns non-zero if a flush is needed before releasing the MMU lock.
+ */
+static int set_tdp_spte(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_mmu_page *root, gfn_t gfn, gfn_t unused,
+ unsigned long data)
+{
+ struct tdp_iter iter;
+ pte_t *ptep = (pte_t *)data;
+ kvm_pfn_t new_pfn;
+ u64 new_spte;
+ int need_flush = 0;
+
+ WARN_ON(pte_huge(*ptep));
+
+ new_pfn = pte_pfn(*ptep);
+
+ tdp_root_for_each_pte(iter, root, gfn, gfn + 1) {
+ if (iter.level != PG_LEVEL_4K)
+ continue;
+
+ if (!is_shadow_present_pte(iter.old_spte))
+ break;
+
+ tdp_mmu_set_spte(kvm, &iter, 0);
+
+ kvm_flush_remote_tlbs_with_address(kvm, iter.gfn, 1);
+
+ if (!pte_write(*ptep)) {
+ new_spte = kvm_mmu_changed_pte_notifier_make_spte(
+ iter.old_spte, new_pfn);
+
+ tdp_mmu_set_spte(kvm, &iter, new_spte);
+ }
+
+ need_flush = 1;
+ }
+
+ if (need_flush)
+ kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
+
+ return 0;
+}
+
+int kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address,
+ pte_t *host_ptep)
+{
+ return kvm_tdp_mmu_handle_hva_range(kvm, address, address + 1,
+ (unsigned long)host_ptep,
+ set_tdp_spte);
+}
+
+/*
+ * Remove write access from all the SPTEs mapping GFNs [start, end). If
+ * skip_4k is set, SPTEs that map 4k pages, will not be write-protected.
+ * Returns true if an SPTE has been changed and the TLBs need to be flushed.
+ */
+static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ gfn_t start, gfn_t end, int min_level)
+{
+ struct tdp_iter iter;
+ u64 new_spte;
+ bool spte_set = false;
+
+ BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
+
+ for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
+ min_level, start, end) {
+ if (!is_shadow_present_pte(iter.old_spte) ||
+ !is_last_spte(iter.old_spte, iter.level))
+ continue;
+
+ new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
+
+ tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ spte_set = true;
+
+ tdp_mmu_iter_cond_resched(kvm, &iter);
+ }
+ return spte_set;
+}
+
+/*
+ * Remove write access from all the SPTEs mapping GFNs in the memslot. Will
+ * only affect leaf SPTEs down to min_level.
+ * Returns true if an SPTE has been changed and the TLBs need to be flushed.
+ */
+bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ int min_level)
+{
+ struct kvm_mmu_page *root;
+ int root_as_id;
+ bool spte_set = false;
+
+ for_each_tdp_mmu_root(kvm, root) {
+ root_as_id = kvm_mmu_page_as_id(root);
+ if (root_as_id != slot->as_id)
+ continue;
+
+ /*
+ * Take a reference on the root so that it cannot be freed if
+ * this thread releases the MMU lock and yields in this loop.
+ */
+ kvm_mmu_get_root(kvm, root);
+
+ spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
+ slot->base_gfn + slot->npages, min_level);
+
+ kvm_mmu_put_root(kvm, root);
+ }
+
+ return spte_set;
+}
+
+/*
+ * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
+ * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
+ * If AD bits are not enabled, this will require clearing the writable bit on
+ * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
+ * be flushed.
+ */
+static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ gfn_t start, gfn_t end)
+{
+ struct tdp_iter iter;
+ u64 new_spte;
+ bool spte_set = false;
+
+ tdp_root_for_each_leaf_pte(iter, root, start, end) {
+ if (spte_ad_need_write_protect(iter.old_spte)) {
+ if (is_writable_pte(iter.old_spte))
+ new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
+ else
+ continue;
+ } else {
+ if (iter.old_spte & shadow_dirty_mask)
+ new_spte = iter.old_spte & ~shadow_dirty_mask;
+ else
+ continue;
+ }
+
+ tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+ spte_set = true;
+
+ tdp_mmu_iter_cond_resched(kvm, &iter);
+ }
+ return spte_set;
+}
+
+/*
+ * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
+ * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
+ * If AD bits are not enabled, this will require clearing the writable bit on
+ * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
+ * be flushed.
+ */
+bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ struct kvm_mmu_page *root;
+ int root_as_id;
+ bool spte_set = false;
+
+ for_each_tdp_mmu_root(kvm, root) {
+ root_as_id = kvm_mmu_page_as_id(root);
+ if (root_as_id != slot->as_id)
+ continue;
+
+ /*
+ * Take a reference on the root so that it cannot be freed if
+ * this thread releases the MMU lock and yields in this loop.
+ */
+ kvm_mmu_get_root(kvm, root);
+
+ spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
+ slot->base_gfn + slot->npages);
+
+ kvm_mmu_put_root(kvm, root);
+ }
+
+ return spte_set;
+}
+
+/*
+ * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
+ * set in mask, starting at gfn. The given memslot is expected to contain all
+ * the GFNs represented by set bits in the mask. If AD bits are enabled,
+ * clearing the dirty status will involve clearing the dirty bit on each SPTE
+ * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
+ */
+static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
+ gfn_t gfn, unsigned long mask, bool wrprot)
+{
+ struct tdp_iter iter;
+ u64 new_spte;
+
+ tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
+ gfn + BITS_PER_LONG) {
+ if (!mask)
+ break;
+
+ if (iter.level > PG_LEVEL_4K ||
+ !(mask & (1UL << (iter.gfn - gfn))))
+ continue;
+
+ if (wrprot || spte_ad_need_write_protect(iter.old_spte)) {
+ if (is_writable_pte(iter.old_spte))
+ new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
+ else
+ continue;
+ } else {
+ if (iter.old_spte & shadow_dirty_mask)
+ new_spte = iter.old_spte & ~shadow_dirty_mask;
+ else
+ continue;
+ }
+
+ tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+
+ mask &= ~(1UL << (iter.gfn - gfn));
+ }
+}
+
+/*
+ * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
+ * set in mask, starting at gfn. The given memslot is expected to contain all
+ * the GFNs represented by set bits in the mask. If AD bits are enabled,
+ * clearing the dirty status will involve clearing the dirty bit on each SPTE
+ * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
+ */
+void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn, unsigned long mask,
+ bool wrprot)
+{
+ struct kvm_mmu_page *root;
+ int root_as_id;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_tdp_mmu_root(kvm, root) {
+ root_as_id = kvm_mmu_page_as_id(root);
+ if (root_as_id != slot->as_id)
+ continue;
+
+ clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
+ }
+}
+
+/*
+ * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
+ * only used for PML, and so will involve setting the dirty bit on each SPTE.
+ * Returns true if an SPTE has been changed and the TLBs need to be flushed.
+ */
+static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
+ gfn_t start, gfn_t end)
+{
+ struct tdp_iter iter;
+ u64 new_spte;
+ bool spte_set = false;
+
+ tdp_root_for_each_pte(iter, root, start, end) {
+ if (!is_shadow_present_pte(iter.old_spte))
+ continue;
+
+ new_spte = iter.old_spte | shadow_dirty_mask;
+
+ tdp_mmu_set_spte(kvm, &iter, new_spte);
+ spte_set = true;
+
+ tdp_mmu_iter_cond_resched(kvm, &iter);
+ }
+
+ return spte_set;
+}
+
+/*
+ * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
+ * only used for PML, and so will involve setting the dirty bit on each SPTE.
+ * Returns true if an SPTE has been changed and the TLBs need to be flushed.
+ */
+bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ struct kvm_mmu_page *root;
+ int root_as_id;
+ bool spte_set = false;
+
+ for_each_tdp_mmu_root(kvm, root) {
+ root_as_id = kvm_mmu_page_as_id(root);
+ if (root_as_id != slot->as_id)
+ continue;
+
+ /*
+ * Take a reference on the root so that it cannot be freed if
+ * this thread releases the MMU lock and yields in this loop.
+ */
+ kvm_mmu_get_root(kvm, root);
+
+ spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
+ slot->base_gfn + slot->npages);
+
+ kvm_mmu_put_root(kvm, root);
+ }
+ return spte_set;
+}
+
+/*
+ * Clear non-leaf entries (and free associated page tables) which could
+ * be replaced by large mappings, for GFNs within the slot.
+ */
+static void zap_collapsible_spte_range(struct kvm *kvm,
+ struct kvm_mmu_page *root,
+ gfn_t start, gfn_t end)
+{
+ struct tdp_iter iter;
+ kvm_pfn_t pfn;
+ bool spte_set = false;
+
+ tdp_root_for_each_pte(iter, root, start, end) {
+ if (!is_shadow_present_pte(iter.old_spte) ||
+ is_last_spte(iter.old_spte, iter.level))
+ continue;
+
+ pfn = spte_to_pfn(iter.old_spte);
+ if (kvm_is_reserved_pfn(pfn) ||
+ !PageTransCompoundMap(pfn_to_page(pfn)))
+ continue;
+
+ tdp_mmu_set_spte(kvm, &iter, 0);
+
+ spte_set = tdp_mmu_iter_flush_cond_resched(kvm, &iter);
+ }
+
+ if (spte_set)
+ kvm_flush_remote_tlbs(kvm);
+}
+
+/*
+ * Clear non-leaf entries (and free associated page tables) which could
+ * be replaced by large mappings, for GFNs within the slot.
+ */
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ const struct kvm_memory_slot *slot)
+{
+ struct kvm_mmu_page *root;
+ int root_as_id;
+
+ for_each_tdp_mmu_root(kvm, root) {
+ root_as_id = kvm_mmu_page_as_id(root);
+ if (root_as_id != slot->as_id)
+ continue;
+
+ /*
+ * Take a reference on the root so that it cannot be freed if
+ * this thread releases the MMU lock and yields in this loop.
+ */
+ kvm_mmu_get_root(kvm, root);
+
+ zap_collapsible_spte_range(kvm, root, slot->base_gfn,
+ slot->base_gfn + slot->npages);
+
+ kvm_mmu_put_root(kvm, root);
+ }
+}
+
+/*
+ * Removes write access on the last level SPTE mapping this GFN and unsets the
+ * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted.
+ * Returns true if an SPTE was set and a TLB flush is needed.
+ */
+static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
+ gfn_t gfn)
+{
+ struct tdp_iter iter;
+ u64 new_spte;
+ bool spte_set = false;
+
+ tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) {
+ if (!is_writable_pte(iter.old_spte))
+ break;
+
+ new_spte = iter.old_spte &
+ ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
+
+ tdp_mmu_set_spte(kvm, &iter, new_spte);
+ spte_set = true;
+ }
+
+ return spte_set;
+}
+
+/*
+ * Removes write access on the last level SPTE mapping this GFN and unsets the
+ * SPTE_MMU_WRITABLE bit to ensure future writes continue to be intercepted.
+ * Returns true if an SPTE was set and a TLB flush is needed.
+ */
+bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ struct kvm_mmu_page *root;
+ int root_as_id;
+ bool spte_set = false;
+
+ lockdep_assert_held(&kvm->mmu_lock);
+ for_each_tdp_mmu_root(kvm, root) {
+ root_as_id = kvm_mmu_page_as_id(root);
+ if (root_as_id != slot->as_id)
+ continue;
+
+ spte_set |= write_protect_gfn(kvm, root, gfn);
+ }
+ return spte_set;
+}
+
+/*
+ * Return the level of the lowest level SPTE added to sptes.
+ * That SPTE may be non-present.
+ */
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+{
+ struct tdp_iter iter;
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
+ int leaf = vcpu->arch.mmu->shadow_root_level;
+ gfn_t gfn = addr >> PAGE_SHIFT;
+
+ tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
+ leaf = iter.level;
+ sptes[leaf - 1] = iter.old_spte;
+ }
+
+ return leaf;
+}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
new file mode 100644
index 000000000000..556e065503f6
--- /dev/null
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __KVM_X86_MMU_TDP_MMU_H
+#define __KVM_X86_MMU_TDP_MMU_H
+
+#include <linux/kvm_host.h>
+
+void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
+void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
+
+bool is_tdp_mmu_root(struct kvm *kvm, hpa_t root);
+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
+void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
+
+bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
+void kvm_tdp_mmu_zap_all(struct kvm *kvm);
+
+int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+ int map_writable, int max_level, kvm_pfn_t pfn,
+ bool prefault);
+
+int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+
+int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start,
+ unsigned long end);
+int kvm_tdp_mmu_test_age_hva(struct kvm *kvm, unsigned long hva);
+
+int kvm_tdp_mmu_set_spte_hva(struct kvm *kvm, unsigned long address,
+ pte_t *host_ptep);
+
+bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ int min_level);
+bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
+ struct kvm_memory_slot *slot);
+void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn, unsigned long mask,
+ bool wrprot);
+bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot);
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ const struct kvm_memory_slot *slot);
+
+bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn);
+
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+#endif /* __KVM_X86_MMU_TDP_MMU_H */
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ac830cd50830..8c550999ace0 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -153,20 +153,18 @@ int avic_vm_init(struct kvm *kvm)
return 0;
/* Allocating physical APIC ID table (4KB) */
- p_page = alloc_page(GFP_KERNEL_ACCOUNT);
+ p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!p_page)
goto free_avic;
kvm_svm->avic_physical_id_table_page = p_page;
- clear_page(page_address(p_page));
/* Allocating logical APIC ID table (4KB) */
- l_page = alloc_page(GFP_KERNEL_ACCOUNT);
+ l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!l_page)
goto free_avic;
kvm_svm->avic_logical_id_table_page = l_page;
- clear_page(page_address(l_page));
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
again:
@@ -868,6 +866,7 @@ int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* - Tell IOMMU to use legacy mode for this interrupt.
* - Retrieve ga_tag of prior interrupt remapping data.
*/
+ pi.prev_ga_tag = 0;
pi.is_guest_mode = false;
ret = irq_set_vcpu_affinity(host_irq, &pi);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e90bc436f584..9e4c226dbf7d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -98,6 +98,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h, *g;
+ unsigned int i;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -108,42 +109,37 @@ void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested.ctl;
- svm->nested.host_intercept_exceptions = h->intercept_exceptions;
-
- c->intercept_cr = h->intercept_cr;
- c->intercept_dr = h->intercept_dr;
- c->intercept_exceptions = h->intercept_exceptions;
- c->intercept = h->intercept;
+ for (i = 0; i < MAX_INTERCEPT; i++)
+ c->intercepts[i] = h->intercepts[i];
if (g->int_ctl & V_INTR_MASKING_MASK) {
/* We only want the cr8 intercept bits of L1 */
- c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
- c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
+ vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
+ vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
/*
* Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
* affect any interrupt we may want to inject; therefore,
* interrupt window vmexits are irrelevant to L0.
*/
- c->intercept &= ~(1ULL << INTERCEPT_VINTR);
+ vmcb_clr_intercept(c, INTERCEPT_VINTR);
}
/* We don't want to see VMMCALLs from a nested guest */
- c->intercept &= ~(1ULL << INTERCEPT_VMMCALL);
+ vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
- c->intercept_cr |= g->intercept_cr;
- c->intercept_dr |= g->intercept_dr;
- c->intercept_exceptions |= g->intercept_exceptions;
- c->intercept |= g->intercept;
+ for (i = 0; i < MAX_INTERCEPT; i++)
+ c->intercepts[i] |= g->intercepts[i];
}
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
struct vmcb_control_area *from)
{
- dst->intercept_cr = from->intercept_cr;
- dst->intercept_dr = from->intercept_dr;
- dst->intercept_exceptions = from->intercept_exceptions;
- dst->intercept = from->intercept;
+ unsigned int i;
+
+ for (i = 0; i < MAX_INTERCEPT; i++)
+ dst->intercepts[i] = from->intercepts[i];
+
dst->iopm_base_pa = from->iopm_base_pa;
dst->msrpm_base_pa = from->msrpm_base_pa;
dst->tsc_offset = from->tsc_offset;
@@ -176,7 +172,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
*/
int i;
- if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
return true;
for (i = 0; i < MSRPM_OFFSETS; i++) {
@@ -200,9 +196,23 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
return true;
}
+static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ if (!nested_svm_vmrun_msrpm(svm)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return false;
+ }
+
+ return true;
+}
+
static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
{
- if ((control->intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
+ if ((vmcb_is_intercept(control, INTERCEPT_VMRUN)) == 0)
return false;
if (control->asid == 0)
@@ -215,41 +225,39 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
return true;
}
-static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb)
+static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
{
- bool nested_vmcb_lma;
- if ((vmcb->save.efer & EFER_SVME) == 0)
+ bool vmcb12_lma;
+
+ if ((vmcb12->save.efer & EFER_SVME) == 0)
return false;
- if (((vmcb->save.cr0 & X86_CR0_CD) == 0) &&
- (vmcb->save.cr0 & X86_CR0_NW))
+ if (((vmcb12->save.cr0 & X86_CR0_CD) == 0) && (vmcb12->save.cr0 & X86_CR0_NW))
return false;
- if (!kvm_dr6_valid(vmcb->save.dr6) || !kvm_dr7_valid(vmcb->save.dr7))
+ if (!kvm_dr6_valid(vmcb12->save.dr6) || !kvm_dr7_valid(vmcb12->save.dr7))
return false;
- nested_vmcb_lma =
- (vmcb->save.efer & EFER_LME) &&
- (vmcb->save.cr0 & X86_CR0_PG);
+ vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG);
- if (!nested_vmcb_lma) {
- if (vmcb->save.cr4 & X86_CR4_PAE) {
- if (vmcb->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
+ if (!vmcb12_lma) {
+ if (vmcb12->save.cr4 & X86_CR4_PAE) {
+ if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
return false;
} else {
- if (vmcb->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
+ if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
return false;
}
} else {
- if (!(vmcb->save.cr4 & X86_CR4_PAE) ||
- !(vmcb->save.cr0 & X86_CR0_PE) ||
- (vmcb->save.cr3 & MSR_CR3_LONG_RESERVED_MASK))
+ if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
+ !(vmcb12->save.cr0 & X86_CR0_PE) ||
+ (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK))
return false;
}
- if (kvm_valid_cr4(&svm->vcpu, vmcb->save.cr4))
+ if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
return false;
- return nested_vmcb_check_controls(&vmcb->control);
+ return nested_vmcb_check_controls(&vmcb12->control);
}
static void load_nested_vmcb_control(struct vcpu_svm *svm,
@@ -296,7 +304,7 @@ void sync_nested_vmcb_control(struct vcpu_svm *svm)
* EXIT_INT_INFO.
*/
static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
- struct vmcb *nested_vmcb)
+ struct vmcb *vmcb12)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
u32 exit_int_info = 0;
@@ -308,7 +316,7 @@ static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
if (vcpu->arch.exception.has_error_code) {
exit_int_info |= SVM_EVTINJ_VALID_ERR;
- nested_vmcb->control.exit_int_info_err =
+ vmcb12->control.exit_int_info_err =
vcpu->arch.exception.error_code;
}
@@ -325,7 +333,7 @@ static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
exit_int_info |= SVM_EVTINJ_TYPE_INTR;
}
- nested_vmcb->control.exit_int_info = exit_int_info;
+ vmcb12->control.exit_int_info = exit_int_info;
}
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
@@ -364,31 +372,31 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
return 0;
}
-static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb)
+static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
{
/* Load the nested guest state */
- svm->vmcb->save.es = nested_vmcb->save.es;
- svm->vmcb->save.cs = nested_vmcb->save.cs;
- svm->vmcb->save.ss = nested_vmcb->save.ss;
- svm->vmcb->save.ds = nested_vmcb->save.ds;
- svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
- svm->vmcb->save.idtr = nested_vmcb->save.idtr;
- kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
- svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
- svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
- svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
- svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
- kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
- kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
- kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
+ svm->vmcb->save.es = vmcb12->save.es;
+ svm->vmcb->save.cs = vmcb12->save.cs;
+ svm->vmcb->save.ss = vmcb12->save.ss;
+ svm->vmcb->save.ds = vmcb12->save.ds;
+ svm->vmcb->save.gdtr = vmcb12->save.gdtr;
+ svm->vmcb->save.idtr = vmcb12->save.idtr;
+ kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags);
+ svm_set_efer(&svm->vcpu, vmcb12->save.efer);
+ svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
+ svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
+ svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = vmcb12->save.cr2;
+ kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
+ kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
+ kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
- svm->vmcb->save.rax = nested_vmcb->save.rax;
- svm->vmcb->save.rsp = nested_vmcb->save.rsp;
- svm->vmcb->save.rip = nested_vmcb->save.rip;
- svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
- svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
- svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+ svm->vmcb->save.rax = vmcb12->save.rax;
+ svm->vmcb->save.rsp = vmcb12->save.rsp;
+ svm->vmcb->save.rip = vmcb12->save.rip;
+ svm->vmcb->save.dr7 = vmcb12->save.dr7;
+ svm->vcpu.arch.dr6 = vmcb12->save.dr6;
+ svm->vmcb->save.cpl = vmcb12->save.cpl;
}
static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
@@ -426,17 +434,17 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
vmcb_mark_all_dirty(svm->vmcb);
}
-int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb)
+int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
+ struct vmcb *vmcb12)
{
int ret;
- svm->nested.vmcb = vmcb_gpa;
- load_nested_vmcb_control(svm, &nested_vmcb->control);
- nested_prepare_vmcb_save(svm, nested_vmcb);
+ svm->nested.vmcb12_gpa = vmcb12_gpa;
+ load_nested_vmcb_control(svm, &vmcb12->control);
+ nested_prepare_vmcb_save(svm, vmcb12);
nested_prepare_vmcb_control(svm);
- ret = nested_svm_load_cr3(&svm->vcpu, nested_vmcb->save.cr3,
+ ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
nested_npt_enabled(svm));
if (ret)
return ret;
@@ -449,19 +457,19 @@ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
int nested_svm_vmrun(struct vcpu_svm *svm)
{
int ret;
- struct vmcb *nested_vmcb;
+ struct vmcb *vmcb12;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
- u64 vmcb_gpa;
+ u64 vmcb12_gpa;
if (is_smm(&svm->vcpu)) {
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
- vmcb_gpa = svm->vmcb->save.rax;
- ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
+ vmcb12_gpa = svm->vmcb->save.rax;
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb12_gpa), &map);
if (ret == -EINVAL) {
kvm_inject_gp(&svm->vcpu, 0);
return 1;
@@ -471,26 +479,31 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
ret = kvm_skip_emulated_instruction(&svm->vcpu);
- nested_vmcb = map.hva;
+ vmcb12 = map.hva;
+
+ if (WARN_ON_ONCE(!svm->nested.initialized))
+ return -EINVAL;
- if (!nested_vmcb_checks(svm, nested_vmcb)) {
- nested_vmcb->control.exit_code = SVM_EXIT_ERR;
- nested_vmcb->control.exit_code_hi = 0;
- nested_vmcb->control.exit_info_1 = 0;
- nested_vmcb->control.exit_info_2 = 0;
+ if (!nested_vmcb_checks(svm, vmcb12)) {
+ vmcb12->control.exit_code = SVM_EXIT_ERR;
+ vmcb12->control.exit_code_hi = 0;
+ vmcb12->control.exit_info_1 = 0;
+ vmcb12->control.exit_info_2 = 0;
goto out;
}
- trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
- nested_vmcb->save.rip,
- nested_vmcb->control.int_ctl,
- nested_vmcb->control.event_inj,
- nested_vmcb->control.nested_ctl);
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
+ vmcb12->save.rip,
+ vmcb12->control.int_ctl,
+ vmcb12->control.event_inj,
+ vmcb12->control.nested_ctl);
- trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
- nested_vmcb->control.intercept_cr >> 16,
- nested_vmcb->control.intercept_exceptions,
- nested_vmcb->control.intercept);
+ trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
+ vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
+ vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
+ vmcb12->control.intercepts[INTERCEPT_WORD3],
+ vmcb12->control.intercepts[INTERCEPT_WORD4],
+ vmcb12->control.intercepts[INTERCEPT_WORD5]);
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
@@ -522,7 +535,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
svm->nested.nested_run_pending = 1;
- if (enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb))
+ if (enter_svm_guest_mode(svm, vmcb12_gpa, vmcb12))
goto out_exit_err;
if (nested_svm_vmrun_msrpm(svm))
@@ -563,23 +576,23 @@ void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
int nested_svm_vmexit(struct vcpu_svm *svm)
{
int rc;
- struct vmcb *nested_vmcb;
+ struct vmcb *vmcb12;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
struct kvm_host_map map;
- rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
+ rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
return 1;
}
- nested_vmcb = map.hva;
+ vmcb12 = map.hva;
/* Exit Guest-Mode */
leave_guest_mode(&svm->vcpu);
- svm->nested.vmcb = 0;
+ svm->nested.vmcb12_gpa = 0;
WARN_ON_ONCE(svm->nested.nested_run_pending);
/* in case we halted in L2 */
@@ -587,45 +600,45 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
/* Give the current vmcb to the guest */
- nested_vmcb->save.es = vmcb->save.es;
- nested_vmcb->save.cs = vmcb->save.cs;
- nested_vmcb->save.ss = vmcb->save.ss;
- nested_vmcb->save.ds = vmcb->save.ds;
- nested_vmcb->save.gdtr = vmcb->save.gdtr;
- nested_vmcb->save.idtr = vmcb->save.idtr;
- nested_vmcb->save.efer = svm->vcpu.arch.efer;
- nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
- nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
- nested_vmcb->save.cr2 = vmcb->save.cr2;
- nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
- nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
- nested_vmcb->save.rip = kvm_rip_read(&svm->vcpu);
- nested_vmcb->save.rsp = kvm_rsp_read(&svm->vcpu);
- nested_vmcb->save.rax = kvm_rax_read(&svm->vcpu);
- nested_vmcb->save.dr7 = vmcb->save.dr7;
- nested_vmcb->save.dr6 = svm->vcpu.arch.dr6;
- nested_vmcb->save.cpl = vmcb->save.cpl;
-
- nested_vmcb->control.int_state = vmcb->control.int_state;
- nested_vmcb->control.exit_code = vmcb->control.exit_code;
- nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
- nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
- nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
-
- if (nested_vmcb->control.exit_code != SVM_EXIT_ERR)
- nested_vmcb_save_pending_event(svm, nested_vmcb);
+ vmcb12->save.es = vmcb->save.es;
+ vmcb12->save.cs = vmcb->save.cs;
+ vmcb12->save.ss = vmcb->save.ss;
+ vmcb12->save.ds = vmcb->save.ds;
+ vmcb12->save.gdtr = vmcb->save.gdtr;
+ vmcb12->save.idtr = vmcb->save.idtr;
+ vmcb12->save.efer = svm->vcpu.arch.efer;
+ vmcb12->save.cr0 = kvm_read_cr0(&svm->vcpu);
+ vmcb12->save.cr3 = kvm_read_cr3(&svm->vcpu);
+ vmcb12->save.cr2 = vmcb->save.cr2;
+ vmcb12->save.cr4 = svm->vcpu.arch.cr4;
+ vmcb12->save.rflags = kvm_get_rflags(&svm->vcpu);
+ vmcb12->save.rip = kvm_rip_read(&svm->vcpu);
+ vmcb12->save.rsp = kvm_rsp_read(&svm->vcpu);
+ vmcb12->save.rax = kvm_rax_read(&svm->vcpu);
+ vmcb12->save.dr7 = vmcb->save.dr7;
+ vmcb12->save.dr6 = svm->vcpu.arch.dr6;
+ vmcb12->save.cpl = vmcb->save.cpl;
+
+ vmcb12->control.int_state = vmcb->control.int_state;
+ vmcb12->control.exit_code = vmcb->control.exit_code;
+ vmcb12->control.exit_code_hi = vmcb->control.exit_code_hi;
+ vmcb12->control.exit_info_1 = vmcb->control.exit_info_1;
+ vmcb12->control.exit_info_2 = vmcb->control.exit_info_2;
+
+ if (vmcb12->control.exit_code != SVM_EXIT_ERR)
+ nested_vmcb_save_pending_event(svm, vmcb12);
if (svm->nrips_enabled)
- nested_vmcb->control.next_rip = vmcb->control.next_rip;
+ vmcb12->control.next_rip = vmcb->control.next_rip;
- nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl;
- nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl;
- nested_vmcb->control.event_inj = svm->nested.ctl.event_inj;
- nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err;
+ vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
+ vmcb12->control.tlb_ctl = svm->nested.ctl.tlb_ctl;
+ vmcb12->control.event_inj = svm->nested.ctl.event_inj;
+ vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
- nested_vmcb->control.pause_filter_count =
+ vmcb12->control.pause_filter_count =
svm->vmcb->control.pause_filter_count;
- nested_vmcb->control.pause_filter_thresh =
+ vmcb12->control.pause_filter_thresh =
svm->vmcb->control.pause_filter_thresh;
/* Restore the original control entries */
@@ -659,11 +672,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb_mark_all_dirty(svm->vmcb);
- trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code,
- nested_vmcb->control.exit_info_1,
- nested_vmcb->control.exit_info_2,
- nested_vmcb->control.exit_int_info,
- nested_vmcb->control.exit_int_info_err,
+ trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+ vmcb12->control.exit_info_1,
+ vmcb12->control.exit_info_2,
+ vmcb12->control.exit_int_info,
+ vmcb12->control.exit_int_info_err,
KVM_ISA_SVM);
kvm_vcpu_unmap(&svm->vcpu, &map, true);
@@ -688,6 +701,45 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
return 0;
}
+int svm_allocate_nested(struct vcpu_svm *svm)
+{
+ struct page *hsave_page;
+
+ if (svm->nested.initialized)
+ return 0;
+
+ hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!hsave_page)
+ return -ENOMEM;
+ svm->nested.hsave = page_address(hsave_page);
+
+ svm->nested.msrpm = svm_vcpu_alloc_msrpm();
+ if (!svm->nested.msrpm)
+ goto err_free_hsave;
+ svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
+
+ svm->nested.initialized = true;
+ return 0;
+
+err_free_hsave:
+ __free_page(hsave_page);
+ return -ENOMEM;
+}
+
+void svm_free_nested(struct vcpu_svm *svm)
+{
+ if (!svm->nested.initialized)
+ return;
+
+ svm_vcpu_free_msrpm(svm->nested.msrpm);
+ svm->nested.msrpm = NULL;
+
+ __free_page(virt_to_page(svm->nested.hsave));
+ svm->nested.hsave = NULL;
+
+ svm->nested.initialized = false;
+}
+
/*
* Forcibly leave nested mode in order to be able to reset the VCPU later on.
*/
@@ -702,6 +754,8 @@ void svm_leave_nested(struct vcpu_svm *svm)
copy_vmcb_control_area(&vmcb->control, &hsave->control);
nested_svm_uninit_mmu_context(&svm->vcpu);
}
+
+ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
}
static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
@@ -709,7 +763,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
u32 offset, msr, value;
int write, mask;
- if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
return NESTED_EXIT_HOST;
msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
@@ -736,7 +790,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
u8 start_bit;
u64 gpa;
- if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
+ if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
return NESTED_EXIT_HOST;
port = svm->vmcb->control.exit_info_1 >> 16;
@@ -767,14 +821,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
vmexit = nested_svm_intercept_ioio(svm);
break;
case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
- u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
- if (svm->nested.ctl.intercept_cr & bit)
+ if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
break;
}
case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
- u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
- if (svm->nested.ctl.intercept_dr & bit)
+ if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
break;
}
@@ -792,8 +844,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
break;
}
default: {
- u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- if (svm->nested.ctl.intercept & exit_bits)
+ if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
vmexit = NESTED_EXIT_DONE;
}
}
@@ -833,7 +884,7 @@ static bool nested_exit_on_exception(struct vcpu_svm *svm)
{
unsigned int nr = svm->vcpu.arch.exception.nr;
- return (svm->nested.ctl.intercept_exceptions & (1 << nr));
+ return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr));
}
static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
@@ -901,7 +952,7 @@ static void nested_svm_intr(struct vcpu_svm *svm)
static inline bool nested_exit_on_init(struct vcpu_svm *svm)
{
- return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INIT));
+ return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
}
static void nested_svm_init(struct vcpu_svm *svm)
@@ -982,7 +1033,8 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (get_host_vmcb(svm)->control.intercept_exceptions & excp_bits)
+ if (get_host_vmcb(svm)->control.intercepts[INTERCEPT_EXCEPTION] &
+ excp_bits)
return NESTED_EXIT_HOST;
else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
svm->vcpu.arch.apf.host_apf_flags)
@@ -1020,7 +1072,7 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
/* First fill in the header and copy it out. */
if (is_guest_mode(vcpu)) {
- kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb;
+ kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa;
kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
@@ -1062,10 +1114,14 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
struct vmcb *hsave = svm->nested.hsave;
struct vmcb __user *user_vmcb = (struct vmcb __user *)
&user_kvm_nested_state->data.svm[0];
- struct vmcb_control_area ctl;
- struct vmcb_save_area save;
+ struct vmcb_control_area *ctl;
+ struct vmcb_save_area *save;
+ int ret;
u32 cr0;
+ BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
+ KVM_STATE_NESTED_SVM_VMCB_SIZE);
+
if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
return -EINVAL;
@@ -1090,20 +1146,30 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
svm_leave_nested(svm);
- goto out_set_gif;
+ svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+ return 0;
}
if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
return -EINVAL;
if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
return -EINVAL;
- if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl)))
- return -EFAULT;
- if (copy_from_user(&save, &user_vmcb->save, sizeof(save)))
- return -EFAULT;
- if (!nested_vmcb_check_controls(&ctl))
- return -EINVAL;
+ ret = -ENOMEM;
+ ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
+ save = kzalloc(sizeof(*save), GFP_KERNEL);
+ if (!ctl || !save)
+ goto out_free;
+
+ ret = -EFAULT;
+ if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
+ goto out_free;
+ if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
+ goto out_free;
+
+ ret = -EINVAL;
+ if (!nested_vmcb_check_controls(ctl))
+ goto out_free;
/*
* Processor state contains L2 state. Check that it is
@@ -1111,15 +1177,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
*/
cr0 = kvm_read_cr0(vcpu);
if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
- return -EINVAL;
+ goto out_free;
/*
* Validate host state saved from before VMRUN (see
* nested_svm_check_permissions).
* TODO: validate reserved bits for all saved state.
*/
- if (!(save.cr0 & X86_CR0_PG))
- return -EINVAL;
+ if (!(save->cr0 & X86_CR0_PG))
+ goto out_free;
/*
* All checks done, we can enter guest mode. L1 control fields
@@ -1128,22 +1194,24 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* contains saved L1 state.
*/
copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
- hsave->save = save;
+ hsave->save = *save;
- svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa;
- load_nested_vmcb_control(svm, &ctl);
+ svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
+ load_nested_vmcb_control(svm, ctl);
nested_prepare_vmcb_control(svm);
- if (!nested_svm_vmrun_msrpm(svm))
- return -EINVAL;
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+ ret = 0;
+out_free:
+ kfree(save);
+ kfree(ctl);
-out_set_gif:
- svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
- return 0;
+ return ret;
}
struct kvm_x86_nested_ops svm_nested_ops = {
.check_events = svm_check_nested_events,
+ .get_nested_state_pages = svm_get_nested_state_pages,
.get_state = svm_get_nested_state,
.set_state = svm_set_nested_state,
};
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 7bf7bf734979..c0b14106258a 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
uint8_t *page_virtual;
unsigned long i;
- if (npages == 0 || pages == NULL)
+ if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
+ pages == NULL)
return;
for (i = 0; i < npages; i++) {
@@ -446,10 +447,8 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
}
/*
- * The LAUNCH_UPDATE command will perform in-place encryption of the
- * memory content (i.e it will write the same memory region with C=1).
- * It's possible that the cache may contain the data with C=0, i.e.,
- * unencrypted so invalidate it first.
+ * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
+ * place; the cache may contain the data that was written unencrypted.
*/
sev_clflush_pages(inpages, npages);
@@ -805,10 +804,9 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
}
/*
- * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
- * memory content (i.e it will write the same memory region with C=1).
- * It's possible that the cache may contain the data with C=0, i.e.,
- * unencrypted so invalidate it first.
+ * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
+ * the pages; flush the destination too so that future accesses do not
+ * see stale data.
*/
sev_clflush_pages(src_p, 1);
sev_clflush_pages(dst_p, 1);
@@ -856,7 +854,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
struct kvm_sev_launch_secret params;
struct page **pages;
void *blob, *hdr;
- unsigned long n;
+ unsigned long n, i;
int ret, offset;
if (!sev_guest(kvm))
@@ -870,6 +868,12 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
return PTR_ERR(pages);
/*
+ * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
+ * place; the cache may contain the data that was written unencrypted.
+ */
+ sev_clflush_pages(pages, n);
+
+ /*
* The secret must be copied into contiguous memory region, lets verify
* that userspace memory pages are contiguous before we issue command.
*/
@@ -914,6 +918,11 @@ e_free_blob:
e_free:
kfree(data);
e_unpin_memory:
+ /* content of memory is updated, mark pages dirty */
+ for (i = 0; i < n; i++) {
+ set_page_dirty_lock(pages[i]);
+ mark_page_accessed(pages[i]);
+ }
sev_unpin_memory(kvm, pages, n);
return ret;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c44f3e9140d5..2f32fd09e259 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -19,7 +19,7 @@
#include <linux/trace_events.h>
#include <linux/slab.h>
#include <linux/hashtable.h>
-#include <linux/frame.h>
+#include <linux/objtool.h>
#include <linux/psp-sev.h>
#include <linux/file.h>
#include <linux/pagemap.h>
@@ -91,7 +91,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
static const struct svm_direct_access_msrs {
u32 index; /* Index of the MSR */
bool always; /* True if intercept is always on */
-} direct_access_msrs[] = {
+} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
{ .index = MSR_STAR, .always = true },
{ .index = MSR_IA32_SYSENTER_CS, .always = true },
#ifdef CONFIG_X86_64
@@ -263,9 +263,10 @@ static int get_max_npt_level(void)
#endif
}
-void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ u64 old_efer = vcpu->arch.efer;
vcpu->arch.efer = efer;
if (!npt_enabled) {
@@ -276,13 +277,32 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
efer &= ~EFER_LME;
}
- if (!(efer & EFER_SVME)) {
- svm_leave_nested(svm);
- svm_set_gif(svm, true);
+ if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
+ if (!(efer & EFER_SVME)) {
+ svm_leave_nested(svm);
+ svm_set_gif(svm, true);
+
+ /*
+ * Free the nested guest state, unless we are in SMM.
+ * In this case we will return to the nested guest
+ * as soon as we leave SMM.
+ */
+ if (!is_smm(&svm->vcpu))
+ svm_free_nested(svm);
+
+ } else {
+ int ret = svm_allocate_nested(svm);
+
+ if (ret) {
+ vcpu->arch.efer = old_efer;
+ return ret;
+ }
+ }
}
svm->vmcb->save.efer = efer | EFER_SVME;
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
+ return 0;
}
static int is_external_interrupt(u32 info)
@@ -553,18 +573,44 @@ free_cpu_data:
}
-static bool valid_msr_intercept(u32 index)
+static int direct_access_msr_slot(u32 msr)
{
- int i;
+ u32 i;
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
- if (direct_access_msrs[i].index == index)
- return true;
+ if (direct_access_msrs[i].index == msr)
+ return i;
- return false;
+ return -ENOENT;
}
-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
+static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read,
+ int write)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int slot = direct_access_msr_slot(msr);
+
+ if (slot == -ENOENT)
+ return;
+
+ /* Set the shadow bitmaps to the desired intercept states */
+ if (read)
+ set_bit(slot, svm->shadow_msr_intercept.read);
+ else
+ clear_bit(slot, svm->shadow_msr_intercept.read);
+
+ if (write)
+ set_bit(slot, svm->shadow_msr_intercept.write);
+ else
+ clear_bit(slot, svm->shadow_msr_intercept.write);
+}
+
+static bool valid_msr_intercept(u32 index)
+{
+ return direct_access_msr_slot(index) != -ENOENT;
+}
+
+static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
{
u8 bit_write;
unsigned long tmp;
@@ -583,8 +629,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
return !!test_bit(bit_write, &tmp);
}
-static void set_msr_interception(u32 *msrpm, unsigned msr,
- int read, int write)
+static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
+ u32 msr, int read, int write)
{
u8 bit_read, bit_write;
unsigned long tmp;
@@ -596,6 +642,13 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
*/
WARN_ON(!valid_msr_intercept(msr));
+ /* Enforce non allowed MSRs to trap */
+ if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
+ read = 0;
+
+ if (write && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE))
+ write = 0;
+
offset = svm_msrpm_offset(msr);
bit_read = 2 * (msr & 0x0f);
bit_write = 2 * (msr & 0x0f) + 1;
@@ -609,17 +662,60 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
msrpm[offset] = tmp;
}
-static void svm_vcpu_init_msrpm(u32 *msrpm)
+static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+ int read, int write)
{
- int i;
+ set_shadow_msr_intercept(vcpu, msr, read, write);
+ set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
+}
+u32 *svm_vcpu_alloc_msrpm(void)
+{
+ struct page *pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
+ u32 *msrpm;
+
+ if (!pages)
+ return NULL;
+
+ msrpm = page_address(pages);
memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
+ return msrpm;
+}
+
+void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm)
+{
+ int i;
+
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
if (!direct_access_msrs[i].always)
continue;
+ set_msr_interception(vcpu, msrpm, direct_access_msrs[i].index, 1, 1);
+ }
+}
+
- set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
+void svm_vcpu_free_msrpm(u32 *msrpm)
+{
+ __free_pages(virt_to_page(msrpm), MSRPM_ALLOC_ORDER);
+}
+
+static void svm_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u32 i;
+
+ /*
+ * Set intercept permissions for all direct access MSRs again. They
+ * will automatically get filtered through the MSR filter, so we are
+ * back in sync after this.
+ */
+ for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
+ u32 msr = direct_access_msrs[i].index;
+ u32 read = test_bit(i, svm->shadow_msr_intercept.read);
+ u32 write = test_bit(i, svm->shadow_msr_intercept.write);
+
+ set_msr_interception_bitmap(vcpu, svm->msrpm, msr, read, write);
}
}
@@ -666,26 +762,26 @@ static void init_msrpm_offsets(void)
}
}
-static void svm_enable_lbrv(struct vcpu_svm *svm)
+static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
- u32 *msrpm = svm->msrpm;
+ struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
- set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
- set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
- set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
- set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}
-static void svm_disable_lbrv(struct vcpu_svm *svm)
+static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
{
- u32 *msrpm = svm->msrpm;
+ struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
- set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
- set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
- set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
- set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
void disable_nmi_singlestep(struct vcpu_svm *svm)
@@ -813,6 +909,9 @@ static __init void svm_set_cpu_caps(void)
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+
+ /* Enable INVPCID feature */
+ kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
}
static __init int svm_hardware_setup(void)
@@ -985,6 +1084,21 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
return svm->vmcb->control.tsc_offset;
}
+static void svm_check_invpcid(struct vcpu_svm *svm)
+{
+ /*
+ * Intercept INVPCID instruction only if shadow page table is
+ * enabled. Interception is not required with nested page table
+ * enabled.
+ */
+ if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
+ if (!npt_enabled)
+ svm_set_intercept(svm, INTERCEPT_INVPCID);
+ else
+ svm_clr_intercept(svm, INTERCEPT_INVPCID);
+ }
+}
+
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -992,14 +1106,14 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->vcpu.arch.hflags = 0;
- set_cr_intercept(svm, INTERCEPT_CR0_READ);
- set_cr_intercept(svm, INTERCEPT_CR3_READ);
- set_cr_intercept(svm, INTERCEPT_CR4_READ);
- set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
- set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
- set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
+ svm_set_intercept(svm, INTERCEPT_CR0_READ);
+ svm_set_intercept(svm, INTERCEPT_CR3_READ);
+ svm_set_intercept(svm, INTERCEPT_CR4_READ);
+ svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
+ svm_set_intercept(svm, INTERCEPT_CR3_WRITE);
+ svm_set_intercept(svm, INTERCEPT_CR4_WRITE);
if (!kvm_vcpu_apicv_active(&svm->vcpu))
- set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
set_dr_intercepts(svm);
@@ -1094,15 +1208,15 @@ static void init_vmcb(struct vcpu_svm *svm)
control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
svm_clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
- clr_cr_intercept(svm, INTERCEPT_CR3_READ);
- clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
+ svm_clr_intercept(svm, INTERCEPT_CR3_READ);
+ svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
save->g_pat = svm->vcpu.arch.pat;
save->cr3 = 0;
save->cr4 = 0;
}
svm->asid_generation = 0;
- svm->nested.vmcb = 0;
+ svm->nested.vmcb12_gpa = 0;
svm->vcpu.arch.hflags = 0;
if (!kvm_pause_in_guest(svm->vcpu.kvm)) {
@@ -1114,6 +1228,8 @@ static void init_vmcb(struct vcpu_svm *svm)
svm_clr_intercept(svm, INTERCEPT_PAUSE);
}
+ svm_check_invpcid(svm);
+
if (kvm_vcpu_apicv_active(&svm->vcpu))
avic_init_vmcb(svm);
@@ -1171,35 +1287,20 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static int svm_create_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm;
- struct page *page;
- struct page *msrpm_pages;
- struct page *hsave_page;
- struct page *nested_msrpm_pages;
+ struct page *vmcb_page;
int err;
BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
svm = to_svm(vcpu);
err = -ENOMEM;
- page = alloc_page(GFP_KERNEL_ACCOUNT);
- if (!page)
+ vmcb_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ if (!vmcb_page)
goto out;
- msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
- if (!msrpm_pages)
- goto free_page1;
-
- nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
- if (!nested_msrpm_pages)
- goto free_page2;
-
- hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
- if (!hsave_page)
- goto free_page3;
-
err = avic_init_vcpu(svm);
if (err)
- goto free_page4;
+ goto error_free_vmcb_page;
/* We initialize this flag to true to make sure that the is_running
* bit would be set the first time the vcpu is loaded.
@@ -1207,18 +1308,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
svm->avic_is_running = true;
- svm->nested.hsave = page_address(hsave_page);
- clear_page(svm->nested.hsave);
+ svm->msrpm = svm_vcpu_alloc_msrpm();
+ if (!svm->msrpm)
+ goto error_free_vmcb_page;
- svm->msrpm = page_address(msrpm_pages);
- svm_vcpu_init_msrpm(svm->msrpm);
+ svm_vcpu_init_msrpm(vcpu, svm->msrpm);
- svm->nested.msrpm = page_address(nested_msrpm_pages);
- svm_vcpu_init_msrpm(svm->nested.msrpm);
-
- svm->vmcb = page_address(page);
- clear_page(svm->vmcb);
- svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
+ svm->vmcb = page_address(vmcb_page);
+ svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
svm->asid_generation = 0;
init_vmcb(svm);
@@ -1227,14 +1324,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
return 0;
-free_page4:
- __free_page(hsave_page);
-free_page3:
- __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
-free_page2:
- __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
-free_page1:
- __free_page(page);
+error_free_vmcb_page:
+ __free_page(vmcb_page);
out:
return err;
}
@@ -1258,10 +1349,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
*/
svm_clear_current_vmcb(svm->vmcb);
+ svm_free_nested(svm);
+
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
- __free_page(virt_to_page(svm->nested.hsave));
- __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1549,11 +1640,11 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
vmcb_mark_dirty(svm->vmcb, VMCB_CR);
if (gcr0 == *hcr0) {
- clr_cr_intercept(svm, INTERCEPT_CR0_READ);
- clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
+ svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+ svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
} else {
- set_cr_intercept(svm, INTERCEPT_CR0_READ);
- set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
+ svm_set_intercept(svm, INTERCEPT_CR0_READ);
+ svm_set_intercept(svm, INTERCEPT_CR0_WRITE);
}
}
@@ -2183,6 +2274,12 @@ static int iret_interception(struct vcpu_svm *svm)
return 1;
}
+static int invd_interception(struct vcpu_svm *svm)
+{
+ /* Treat an INVD instruction as a NOP and just skip it. */
+ return kvm_skip_emulated_instruction(&svm->vcpu);
+}
+
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -2218,12 +2315,9 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
{
unsigned long cr0 = svm->vcpu.arch.cr0;
bool ret = false;
- u64 intercept;
-
- intercept = svm->nested.ctl.intercept;
if (!is_guest_mode(&svm->vcpu) ||
- (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
+ (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
return false;
cr0 &= ~SVM_CR0_SELECTIVE_MASK;
@@ -2261,6 +2355,7 @@ static int cr_interception(struct vcpu_svm *svm)
if (cr >= 16) { /* mov to cr */
cr -= 16;
val = kvm_register_read(&svm->vcpu, reg);
+ trace_kvm_cr_write(cr, val);
switch (cr) {
case 0:
if (!check_selective_cr0_intercepted(svm, val))
@@ -2306,6 +2401,7 @@ static int cr_interception(struct vcpu_svm *svm)
return 1;
}
kvm_register_write(&svm->vcpu, reg, val);
+ trace_kvm_cr_read(cr, val);
}
return kvm_complete_insn_gp(&svm->vcpu, err);
}
@@ -2556,7 +2652,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* We update the L1 MSR bit as well since it will end up
* touching the MSR anyway now.
*/
- set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
break;
case MSR_IA32_PRED_CMD:
if (!msr->host_initiated &&
@@ -2571,7 +2667,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
- set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr->host_initiated &&
@@ -2635,9 +2731,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->vmcb->save.dbgctl = data;
vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
if (data & (1ULL<<0))
- svm_enable_lbrv(svm);
+ svm_enable_lbrv(vcpu);
else
- svm_disable_lbrv(svm);
+ svm_disable_lbrv(vcpu);
break;
case MSR_VM_HSAVE_PA:
svm->nested.hsave_msr = data;
@@ -2733,6 +2829,33 @@ static int mwait_interception(struct vcpu_svm *svm)
return nop_interception(svm);
}
+static int invpcid_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ unsigned long type;
+ gva_t gva;
+
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ /*
+ * For an INVPCID intercept:
+ * EXITINFO1 provides the linear address of the memory operand.
+ * EXITINFO2 provides the contents of the register operand.
+ */
+ type = svm->vmcb->control.exit_info_2;
+ gva = svm->vmcb->control.exit_info_1;
+
+ if (type > 3) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ return kvm_handle_invpcid(vcpu, type, gva);
+}
+
static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
@@ -2774,7 +2897,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_RDPMC] = rdpmc_interception,
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
- [SVM_EXIT_INVD] = emulate_on_interception,
+ [SVM_EXIT_INVD] = invd_interception,
[SVM_EXIT_PAUSE] = pause_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
@@ -2795,6 +2918,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_RDPRU] = rdpru_interception,
+ [SVM_EXIT_INVPCID] = invpcid_interception,
[SVM_EXIT_NPF] = npf_interception,
[SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
@@ -2813,12 +2937,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
}
pr_err("VMCB Control Area:\n");
- pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
- pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
- pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
- pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
- pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
- pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
+ pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
+ pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
+ pr_err("%-20s%04x\n", "dr_read:", control->intercepts[INTERCEPT_DR] & 0xffff);
+ pr_err("%-20s%04x\n", "dr_write:", control->intercepts[INTERCEPT_DR] >> 16);
+ pr_err("%-20s%08x\n", "exceptions:", control->intercepts[INTERCEPT_EXCEPTION]);
+ pr_err("%-20s%08x %08x\n", "intercepts:",
+ control->intercepts[INTERCEPT_WORD3],
+ control->intercepts[INTERCEPT_WORD4]);
pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
pr_err("%-20s%d\n", "pause filter threshold:",
control->pause_filter_thresh);
@@ -2917,12 +3043,19 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
"excp_to:", save->last_excp_to);
}
-static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
+static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+ u32 *intr_info, u32 *error_code)
{
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
*info1 = control->exit_info_1;
*info2 = control->exit_info_2;
+ *intr_info = control->exit_int_info;
+ if ((*intr_info & SVM_EXITINTINFO_VALID) &&
+ (*intr_info & SVM_EXITINTINFO_VALID_ERR))
+ *error_code = control->exit_int_info_err;
+ else
+ *error_code = 0;
}
static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
@@ -2933,7 +3066,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
- if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
+ if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
vcpu->arch.cr3 = svm->vmcb->save.cr3;
@@ -2941,12 +3074,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (is_guest_mode(vcpu)) {
int vmexit;
- trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
- svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2,
- svm->vmcb->control.exit_int_info,
- svm->vmcb->control.exit_int_info_err,
- KVM_ISA_SVM);
+ trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM);
vmexit = nested_svm_exit_special(svm);
@@ -3056,13 +3184,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
if (nested_svm_virtualize_tpr(vcpu))
return;
- clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
if (irr == -1)
return;
if (tpr >= irr)
- set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
}
bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
@@ -3250,7 +3378,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
if (nested_svm_virtualize_tpr(vcpu))
return;
- if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
+ if (!svm_is_intercept(svm, INTERCEPT_CR8_WRITE)) {
int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
kvm_set_cr8(vcpu, cr8);
}
@@ -3347,8 +3475,7 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
- if (!is_guest_mode(vcpu) &&
- to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
to_svm(vcpu)->vmcb->control.exit_info_1)
return handle_fastpath_set_msr_irqoff(vcpu);
@@ -3413,7 +3540,6 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
- fastpath_t exit_fastpath;
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
@@ -3454,9 +3580,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
kvm_load_guest_xsave_state(vcpu);
- if (lapic_in_kernel(vcpu) &&
- vcpu->arch.apic->lapic_timer.timer_advance_ns)
- kvm_wait_lapic_expire(vcpu);
+ kvm_wait_lapic_expire(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -3536,8 +3660,11 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
svm_handle_mce(svm);
svm_complete_interrupts(svm);
- exit_fastpath = svm_exit_handlers_fastpath(vcpu);
- return exit_fastpath;
+
+ if (is_guest_mode(vcpu))
+ return EXIT_FASTPATH_NONE;
+
+ return svm_exit_handlers_fastpath(vcpu);
}
static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,
@@ -3623,6 +3750,9 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
+ /* Check again if INVPCID interception if required */
+ svm_check_invpcid(svm);
+
if (!kvm_vcpu_apicv_active(vcpu))
return;
@@ -3737,7 +3867,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
break;
case SVM_EXIT_WRITE_CR0: {
unsigned long cr0, val;
- u64 intercept;
if (info->intercept == x86_intercept_cr_write)
icpt_info.exit_code += info->modrm_reg;
@@ -3746,9 +3875,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
info->intercept == x86_intercept_clts)
break;
- intercept = svm->nested.ctl.intercept;
-
- if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
+ if (!(vmcb_is_intercept(&svm->nested.ctl,
+ INTERCEPT_SELECTIVE_CR0)))
break;
cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
@@ -3883,7 +4011,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
/* FED8h - SVM Guest */
put_smstate(u64, smstate, 0x7ed8, 1);
/* FEE0h - SVM Guest VMCB Physical Address */
- put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
+ put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -3905,7 +4033,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
- u64 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
+ u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
if (guest) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@@ -3915,10 +4043,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
return 1;
if (kvm_vcpu_map(&svm->vcpu,
- gpa_to_gfn(vmcb), &map) == -EINVAL)
+ gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+ return 1;
+
+ if (svm_allocate_nested(svm))
return 1;
- ret = enter_svm_guest_mode(svm, vmcb, map.hva);
+ ret = enter_svm_guest_mode(svm, vmcb12_gpa, map.hva);
kvm_vcpu_unmap(&svm->vcpu, &map, true);
}
}
@@ -3939,19 +4070,10 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
}
}
-static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
{
- unsigned long cr4 = kvm_read_cr4(vcpu);
- bool smep = cr4 & X86_CR4_SMEP;
- bool smap = cr4 & X86_CR4_SMAP;
- bool is_user = svm_get_cpl(vcpu) == 3;
-
- /*
- * If RIP is invalid, go ahead with emulation which will cause an
- * internal error exit.
- */
- if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
- return true;
+ bool smep, smap, is_user;
+ unsigned long cr4;
/*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
@@ -3993,6 +4115,20 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
* instruction pointer so we will not able to workaround it. Lets
* print the error and request to kill the guest.
*/
+ if (likely(!insn || insn_len))
+ return true;
+
+ /*
+ * If RIP is invalid, go ahead with emulation which will cause an
+ * internal error exit.
+ */
+ if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
+ return true;
+
+ cr4 = kvm_read_cr4(vcpu);
+ smep = cr4 & X86_CR4_SMEP;
+ smap = cr4 & X86_CR4_SMAP;
+ is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) {
if (!sev_guest(vcpu->kvm))
return true;
@@ -4016,7 +4152,7 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
* if an INIT signal is pending.
*/
return !gif_set(svm) ||
- (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
+ (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
}
static void svm_vm_destroy(struct kvm *kvm)
@@ -4154,9 +4290,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region,
- .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
+ .can_emulate_instruction = svm_can_emulate_instruction,
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
+
+ .msr_filter_changed = svm_msr_filter_changed,
};
static struct kvm_x86_init_ops svm_init_ops __initdata = {
@@ -4170,6 +4308,8 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
static int __init svm_init(void)
{
+ __unused_size_checks();
+
return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm),
__alignof__(struct vcpu_svm), THIS_MODULE);
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index a798e1731709..1d853fe4c778 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -31,6 +31,7 @@ static const u32 host_save_user_msrs[] = {
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+#define MAX_DIRECT_ACCESS_MSRS 15
#define MSRPM_OFFSETS 16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -85,8 +86,7 @@ struct svm_nested_state {
struct vmcb *hsave;
u64 hsave_msr;
u64 vm_cr_msr;
- u64 vmcb;
- u32 host_intercept_exceptions;
+ u64 vmcb12_gpa;
/* These are the merged vectors */
u32 *msrpm;
@@ -97,6 +97,8 @@ struct svm_nested_state {
/* cache for control fields of the guest */
struct vmcb_control_area ctl;
+
+ bool initialized;
};
struct vcpu_svm {
@@ -158,6 +160,12 @@ struct vcpu_svm {
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
+
+ /* Save desired MSR intercept (read: pass-through) state */
+ struct {
+ DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS);
+ DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
+ } shadow_msr_intercept;
};
struct svm_cpu_data {
@@ -214,51 +222,44 @@ static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
return svm->vmcb;
}
-static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
+static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_cr |= (1U << bit);
-
- recalc_intercepts(svm);
+ WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
+ __set_bit(bit, (unsigned long *)&control->intercepts);
}
-static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
+static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_cr &= ~(1U << bit);
-
- recalc_intercepts(svm);
+ WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
+ __clear_bit(bit, (unsigned long *)&control->intercepts);
}
-static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
+static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- return vmcb->control.intercept_cr & (1U << bit);
+ WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
+ return test_bit(bit, (unsigned long *)&control->intercepts);
}
static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
- | (1 << INTERCEPT_DR1_READ)
- | (1 << INTERCEPT_DR2_READ)
- | (1 << INTERCEPT_DR3_READ)
- | (1 << INTERCEPT_DR4_READ)
- | (1 << INTERCEPT_DR5_READ)
- | (1 << INTERCEPT_DR6_READ)
- | (1 << INTERCEPT_DR7_READ)
- | (1 << INTERCEPT_DR0_WRITE)
- | (1 << INTERCEPT_DR1_WRITE)
- | (1 << INTERCEPT_DR2_WRITE)
- | (1 << INTERCEPT_DR3_WRITE)
- | (1 << INTERCEPT_DR4_WRITE)
- | (1 << INTERCEPT_DR5_WRITE)
- | (1 << INTERCEPT_DR6_WRITE)
- | (1 << INTERCEPT_DR7_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
}
@@ -267,25 +268,27 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr = 0;
+ vmcb->control.intercepts[INTERCEPT_DR] = 0;
recalc_intercepts(svm);
}
-static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
+static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_exceptions |= (1U << bit);
+ WARN_ON_ONCE(bit >= 32);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
recalc_intercepts(svm);
}
-static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
+static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_exceptions &= ~(1U << bit);
+ WARN_ON_ONCE(bit >= 32);
+ vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
recalc_intercepts(svm);
}
@@ -294,7 +297,7 @@ static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept |= (1ULL << bit);
+ vmcb_set_intercept(&vmcb->control, bit);
recalc_intercepts(svm);
}
@@ -303,14 +306,14 @@ static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept &= ~(1ULL << bit);
+ vmcb_clr_intercept(&vmcb->control, bit);
recalc_intercepts(svm);
}
static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
{
- return (svm->vmcb->control.intercept & (1ULL << bit)) != 0;
+ return vmcb_is_intercept(&svm->vmcb->control, bit);
}
static inline bool vgif_enabled(struct vcpu_svm *svm)
@@ -345,11 +348,15 @@ static inline bool gif_set(struct vcpu_svm *svm)
/* svm.c */
#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U
#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U
-#define MSR_CR3_LONG_RESERVED_MASK 0xfff0000000000fe7U
+#define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U
#define MSR_INVALID 0xffffffffU
u32 svm_msrpm_offset(u32 msr);
-void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
+u32 *svm_vcpu_alloc_msrpm(void);
+void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
+void svm_vcpu_free_msrpm(u32 *msrpm);
+
+int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -374,22 +381,24 @@ static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
{
- return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_SMI));
+ return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
}
static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
{
- return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INTR));
+ return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
}
static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
{
- return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI));
+ return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
}
int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
struct vmcb *nested_vmcb);
void svm_leave_nested(struct vcpu_svm *svm);
+void svm_free_nested(struct vcpu_svm *svm);
+int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct vcpu_svm *svm);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b66432b015d2..aef960f90f26 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -15,18 +15,20 @@
* Tracepoint for guest mode entry.
*/
TRACE_EVENT(kvm_entry,
- TP_PROTO(unsigned int vcpu_id),
- TP_ARGS(vcpu_id),
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
+ __field( unsigned long, rip )
),
TP_fast_assign(
- __entry->vcpu_id = vcpu_id;
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->rip = kvm_rip_read(vcpu);
),
- TP_printk("vcpu %u", __entry->vcpu_id)
+ TP_printk("vcpu %u, rip 0x%lx", __entry->vcpu_id, __entry->rip)
);
/*
@@ -233,36 +235,45 @@ TRACE_EVENT(kvm_apic,
(isa == KVM_ISA_VMX) ? \
__print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : ""
+#define TRACE_EVENT_KVM_EXIT(name) \
+TRACE_EVENT(name, \
+ TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa), \
+ TP_ARGS(exit_reason, vcpu, isa), \
+ \
+ TP_STRUCT__entry( \
+ __field( unsigned int, exit_reason ) \
+ __field( unsigned long, guest_rip ) \
+ __field( u32, isa ) \
+ __field( u64, info1 ) \
+ __field( u64, info2 ) \
+ __field( u32, intr_info ) \
+ __field( u32, error_code ) \
+ __field( unsigned int, vcpu_id ) \
+ ), \
+ \
+ TP_fast_assign( \
+ __entry->exit_reason = exit_reason; \
+ __entry->guest_rip = kvm_rip_read(vcpu); \
+ __entry->isa = isa; \
+ __entry->vcpu_id = vcpu->vcpu_id; \
+ kvm_x86_ops.get_exit_info(vcpu, &__entry->info1, \
+ &__entry->info2, \
+ &__entry->intr_info, \
+ &__entry->error_code); \
+ ), \
+ \
+ TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \
+ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \
+ __entry->vcpu_id, \
+ kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \
+ __entry->guest_rip, __entry->info1, __entry->info2, \
+ __entry->intr_info, __entry->error_code) \
+)
+
/*
* Tracepoint for kvm guest exit:
*/
-TRACE_EVENT(kvm_exit,
- TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa),
- TP_ARGS(exit_reason, vcpu, isa),
-
- TP_STRUCT__entry(
- __field( unsigned int, exit_reason )
- __field( unsigned long, guest_rip )
- __field( u32, isa )
- __field( u64, info1 )
- __field( u64, info2 )
- __field( unsigned int, vcpu_id )
- ),
-
- TP_fast_assign(
- __entry->exit_reason = exit_reason;
- __entry->guest_rip = kvm_rip_read(vcpu);
- __entry->isa = isa;
- __entry->vcpu_id = vcpu->vcpu_id;
- kvm_x86_ops.get_exit_info(vcpu, &__entry->info1,
- &__entry->info2);
- ),
-
- TP_printk("vcpu %u reason %s%s%s rip 0x%lx info %llx %llx",
- __entry->vcpu_id,
- kvm_print_exit_reason(__entry->exit_reason, __entry->isa),
- __entry->guest_rip, __entry->info1, __entry->info2)
-);
+TRACE_EVENT_KVM_EXIT(kvm_exit);
/*
* Tracepoint for kvm interrupt injection:
@@ -544,63 +555,38 @@ TRACE_EVENT(kvm_nested_vmrun,
);
TRACE_EVENT(kvm_nested_intercepts,
- TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept),
- TP_ARGS(cr_read, cr_write, exceptions, intercept),
+ TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions,
+ __u32 intercept1, __u32 intercept2, __u32 intercept3),
+ TP_ARGS(cr_read, cr_write, exceptions, intercept1,
+ intercept2, intercept3),
TP_STRUCT__entry(
__field( __u16, cr_read )
__field( __u16, cr_write )
__field( __u32, exceptions )
- __field( __u64, intercept )
+ __field( __u32, intercept1 )
+ __field( __u32, intercept2 )
+ __field( __u32, intercept3 )
),
TP_fast_assign(
__entry->cr_read = cr_read;
__entry->cr_write = cr_write;
__entry->exceptions = exceptions;
- __entry->intercept = intercept;
+ __entry->intercept1 = intercept1;
+ __entry->intercept2 = intercept2;
+ __entry->intercept3 = intercept3;
),
- TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx",
- __entry->cr_read, __entry->cr_write, __entry->exceptions,
- __entry->intercept)
+ TP_printk("cr_read: %04x cr_write: %04x excp: %08x "
+ "intercepts: %08x %08x %08x",
+ __entry->cr_read, __entry->cr_write, __entry->exceptions,
+ __entry->intercept1, __entry->intercept2, __entry->intercept3)
);
/*
* Tracepoint for #VMEXIT while nested
*/
-TRACE_EVENT(kvm_nested_vmexit,
- TP_PROTO(__u64 rip, __u32 exit_code,
- __u64 exit_info1, __u64 exit_info2,
- __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
- TP_ARGS(rip, exit_code, exit_info1, exit_info2,
- exit_int_info, exit_int_info_err, isa),
-
- TP_STRUCT__entry(
- __field( __u64, rip )
- __field( __u32, exit_code )
- __field( __u64, exit_info1 )
- __field( __u64, exit_info2 )
- __field( __u32, exit_int_info )
- __field( __u32, exit_int_info_err )
- __field( __u32, isa )
- ),
-
- TP_fast_assign(
- __entry->rip = rip;
- __entry->exit_code = exit_code;
- __entry->exit_info1 = exit_info1;
- __entry->exit_info2 = exit_info2;
- __entry->exit_int_info = exit_int_info;
- __entry->exit_int_info_err = exit_int_info_err;
- __entry->isa = isa;
- ),
- TP_printk("rip: 0x%016llx reason: %s%s%s ext_inf1: 0x%016llx "
- "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
- __entry->rip,
- kvm_print_exit_reason(__entry->exit_code, __entry->isa),
- __entry->exit_info1, __entry->exit_info2,
- __entry->exit_int_info, __entry->exit_int_info_err)
-);
+TRACE_EVENT_KVM_EXIT(kvm_nested_vmexit);
/*
* Tracepoint for #VMEXIT reinjected to the guest
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 4bbd8b448d22..3a1861403d73 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -151,7 +151,7 @@ static inline bool vmx_umip_emulated(void)
static inline bool cpu_has_vmx_rdtscp(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_RDTSCP;
+ SECONDARY_EXEC_ENABLE_RDTSCP;
}
static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
@@ -196,7 +196,7 @@ static inline bool cpu_has_vmx_ple(void)
SECONDARY_EXEC_PAUSE_LOOP_EXITING;
}
-static inline bool vmx_rdrand_supported(void)
+static inline bool cpu_has_vmx_rdrand(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_RDRAND_EXITING;
@@ -233,7 +233,7 @@ static inline bool cpu_has_vmx_encls_vmexit(void)
SECONDARY_EXEC_ENCLS_EXITING;
}
-static inline bool vmx_rdseed_supported(void)
+static inline bool cpu_has_vmx_rdseed(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_RDSEED_EXITING;
@@ -244,13 +244,13 @@ static inline bool cpu_has_vmx_pml(void)
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
}
-static inline bool vmx_xsaves_supported(void)
+static inline bool cpu_has_vmx_xsaves(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_XSAVES;
}
-static inline bool vmx_waitpkg_supported(void)
+static inline bool cpu_has_vmx_waitpkg(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index e5325bd0f304..f3199bb02f22 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -297,14 +297,13 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
};
const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
-void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
+__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
{
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
-
}
#endif
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index e5f7a7ebf27d..bd41d9462355 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -185,7 +185,7 @@ static inline void evmcs_load(u64 phys_addr)
vp_ap->enlighten_vmentry = 1;
}
-void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
+__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
#else /* !IS_ENABLED(CONFIG_HYPERV) */
static inline void evmcs_write64(unsigned long field, u64 value) {}
static inline void evmcs_write32(unsigned long field, u32 value) {}
@@ -194,7 +194,6 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; }
static inline u32 evmcs_read32(unsigned long field) { return 0; }
static inline u16 evmcs_read16(unsigned long field) { return 0; }
static inline void evmcs_load(u64 phys_addr) {}
-static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1bb6b31eb646..89af692deb7e 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/frame.h>
+#include <linux/objtool.h>
#include <linux/percpu.h>
#include <asm/debugreg.h>
@@ -233,6 +233,44 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
vmx->nested.hv_evmcs = NULL;
}
+static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
+ struct loaded_vmcs *prev)
+{
+ struct vmcs_host_state *dest, *src;
+
+ if (unlikely(!vmx->guest_state_loaded))
+ return;
+
+ src = &prev->host_state;
+ dest = &vmx->loaded_vmcs->host_state;
+
+ vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
+ dest->ldt_sel = src->ldt_sel;
+#ifdef CONFIG_X86_64
+ dest->ds_sel = src->ds_sel;
+ dest->es_sel = src->es_sel;
+#endif
+}
+
+static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct loaded_vmcs *prev;
+ int cpu;
+
+ if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
+ return;
+
+ cpu = get_cpu();
+ prev = vmx->loaded_vmcs;
+ vmx->loaded_vmcs = vmcs;
+ vmx_vcpu_load_vmcs(vcpu, cpu, prev);
+ vmx_sync_vmcs_host_state(vmx, prev);
+ put_cpu();
+
+ vmx_register_cache_reset(vcpu);
+}
+
/*
* Free whatever needs to be freed from vmx->nested when L1 goes down, or
* just stops using VMX.
@@ -241,10 +279,13 @@ static void free_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
- kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
vmx->nested.vmxon = false;
vmx->nested.smm.vmxon = false;
@@ -277,44 +318,6 @@ static void free_nested(struct kvm_vcpu *vcpu)
free_loaded_vmcs(&vmx->nested.vmcs02);
}
-static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
- struct loaded_vmcs *prev)
-{
- struct vmcs_host_state *dest, *src;
-
- if (unlikely(!vmx->guest_state_loaded))
- return;
-
- src = &prev->host_state;
- dest = &vmx->loaded_vmcs->host_state;
-
- vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
- dest->ldt_sel = src->ldt_sel;
-#ifdef CONFIG_X86_64
- dest->ds_sel = src->ds_sel;
- dest->es_sel = src->es_sel;
-#endif
-}
-
-static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct loaded_vmcs *prev;
- int cpu;
-
- if (vmx->loaded_vmcs == vmcs)
- return;
-
- cpu = get_cpu();
- prev = vmx->loaded_vmcs;
- vmx->loaded_vmcs = vmcs;
- vmx_vcpu_load_vmcs(vcpu, cpu, prev);
- vmx_sync_vmcs_host_state(vmx, prev);
- put_cpu();
-
- vmx_register_cache_reset(vcpu);
-}
-
/*
* Ensure that the current vmcs of the logical processor is the
* vmcs01 of the vcpu before calling free_nested().
@@ -323,8 +326,6 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
vmx_leave_nested(vcpu);
- vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
- free_nested(vcpu);
vcpu_put(vcpu);
}
@@ -938,11 +939,11 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
* VM-exit in L0, use the more accurate value.
*/
if (msr_index == MSR_IA32_TSC) {
- int index = vmx_find_msr_index(&vmx->msr_autostore.guest,
- MSR_IA32_TSC);
+ int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
+ MSR_IA32_TSC);
- if (index >= 0) {
- u64 val = vmx->msr_autostore.guest.val[index].value;
+ if (i >= 0) {
+ u64 val = vmx->msr_autostore.guest.val[i].value;
*data = kvm_read_l1_tsc(vcpu, val);
return true;
@@ -1031,16 +1032,16 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
bool in_vmcs12_store_list;
- int msr_autostore_index;
+ int msr_autostore_slot;
bool in_autostore_list;
int last;
- msr_autostore_index = vmx_find_msr_index(autostore, msr_index);
- in_autostore_list = msr_autostore_index >= 0;
+ msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
+ in_autostore_list = msr_autostore_slot >= 0;
in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
if (in_vmcs12_store_list && !in_autostore_list) {
- if (autostore->nr == NR_LOADSTORE_MSRS) {
+ if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
/*
* Emulated VMEntry does not fail here. Instead a less
* accurate value will be returned by
@@ -1057,7 +1058,7 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
autostore->val[last].index = msr_index;
} else if (!in_vmcs12_store_list && in_autostore_list) {
last = --autostore->nr;
- autostore->val[msr_autostore_index] = autostore->val[last];
+ autostore->val[msr_autostore_slot] = autostore->val[last];
}
}
@@ -2286,7 +2287,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
/* Take the following fields only from vmcs12 */
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_ENABLE_INVPCID |
- SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_ENABLE_RDTSCP |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -2314,6 +2315,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write16(GUEST_INTR_STATUS,
vmcs12->guest_intr_status);
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+ exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
secondary_exec_controls_set(vmx, exec_control);
}
@@ -2408,6 +2412,8 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+
+ vmx->segment_cache.bitmask = 0;
}
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
@@ -2571,7 +2577,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* which means L1 attempted VMEntry to L2 with invalid state.
* Fail the VMEntry.
*/
- if (vmx->emulation_required) {
+ if (CC(!vmx_guest_state_valid(vcpu))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
return -EINVAL;
}
@@ -3344,8 +3350,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
prepare_vmcs02_early(vmx, vmcs12);
if (from_vmentry) {
- if (unlikely(!nested_get_vmcs12_pages(vcpu)))
+ if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
+ }
if (nested_vmx_check_vmentry_hw(vcpu)) {
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
@@ -3387,7 +3395,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
* to nested_get_vmcs12_pages before the next VM-entry. The MSRs
* have already been set at vmentry time and should not be reset.
*/
- kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
}
/*
@@ -3468,11 +3476,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (evmptrld_status == EVMPTRLD_ERROR) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
- } else if (evmptrld_status == EVMPTRLD_VMFAIL) {
+ } else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
return nested_vmx_failInvalid(vcpu);
}
- if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
+ if (CC(!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull))
return nested_vmx_failInvalid(vcpu);
vmcs12 = get_vmcs12(vcpu);
@@ -3483,7 +3491,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* rather than RFLAGS.ZF, and no error number is stored to the
* VM-instruction error field.
*/
- if (vmcs12->hdr.shadow_vmcs)
+ if (CC(vmcs12->hdr.shadow_vmcs))
return nested_vmx_failInvalid(vcpu);
if (vmx->nested.hv_evmcs) {
@@ -3504,10 +3512,10 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* for misconfigurations which will anyway be caught by the processor
* when using the merged vmcs02.
*/
- if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)
+ if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
- if (vmcs12->launch_state == launch)
+ if (CC(vmcs12->launch_state == launch))
return nested_vmx_fail(vcpu,
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
@@ -3528,6 +3536,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (unlikely(status != NVMX_VMENTRY_SUCCESS))
goto vmentry_failed;
+ /* Emulate processing of posted interrupts on VM-Enter. */
+ if (nested_cpu_has_posted_intr(vmcs12) &&
+ kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
+ vmx->nested.pi_pending = true;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
+ }
+
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
@@ -4257,7 +4273,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
{
- struct shared_msr_entry *efer_msr;
+ struct vmx_uret_msr *efer_msr;
unsigned int i;
if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
@@ -4271,7 +4287,7 @@ static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
return vmx->msr_autoload.guest.val[i].value;
}
- efer_msr = find_msr_entry(vmx, MSR_EFER);
+ efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
if (efer_msr)
return efer_msr->data;
@@ -4696,7 +4712,7 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
if (r != X86EMUL_CONTINUE) {
- *ret = vmx_handle_memory_failure(vcpu, r, &e);
+ *ret = kvm_handle_memory_failure(vcpu, r, &e);
return -EINVAL;
}
@@ -4760,7 +4776,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
if (vmx_pt_mode_is_host_guest()) {
vmx->pt_desc.guest.ctl = 0;
- pt_update_intercept_for_msr(vmx);
+ pt_update_intercept_for_msr(vcpu);
}
return 0;
@@ -5003,7 +5019,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
if (r != X86EMUL_CONTINUE)
- return vmx_handle_memory_failure(vcpu, r, &e);
+ return kvm_handle_memory_failure(vcpu, r, &e);
}
return nested_vmx_succeed(vcpu);
@@ -5076,7 +5092,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return 1;
r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
if (r != X86EMUL_CONTINUE)
- return vmx_handle_memory_failure(vcpu, r, &e);
+ return kvm_handle_memory_failure(vcpu, r, &e);
}
field = kvm_register_readl(vcpu, (((instr_info) >> 28) & 0xf));
@@ -5238,7 +5254,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
r = kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
sizeof(gpa_t), &e);
if (r != X86EMUL_CONTINUE)
- return vmx_handle_memory_failure(vcpu, r, &e);
+ return kvm_handle_memory_failure(vcpu, r, &e);
return nested_vmx_succeed(vcpu);
}
@@ -5291,7 +5307,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
if (r != X86EMUL_CONTINUE)
- return vmx_handle_memory_failure(vcpu, r, &e);
+ return kvm_handle_memory_failure(vcpu, r, &e);
/*
* Nested EPT roots are always held through guest_mmu,
@@ -5373,7 +5389,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return 1;
r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
if (r != X86EMUL_CONTINUE)
- return vmx_handle_memory_failure(vcpu, r, &e);
+ return kvm_handle_memory_failure(vcpu, r, &e);
if (operand.vpid >> 16)
return nested_vmx_fail(vcpu,
@@ -5918,13 +5934,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
goto reflect_vmexit;
}
- exit_intr_info = vmx_get_intr_info(vcpu);
- exit_qual = vmx_get_exit_qual(vcpu);
-
- trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, exit_qual,
- vmx->idt_vectoring_info, exit_intr_info,
- vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
- KVM_ISA_VMX);
+ trace_kvm_nested_vmexit(exit_reason, vcpu, KVM_ISA_VMX);
/* If L0 (KVM) wants the exit, it trumps L1's desires. */
if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
@@ -5940,14 +5950,14 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
* need to be synthesized by querying the in-kernel LAPIC, but external
* interrupts are never reflected to L1 so it's a non-issue.
*/
- if ((exit_intr_info &
- (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
- (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) {
+ exit_intr_info = vmx_get_intr_info(vcpu);
+ if (is_exception_with_error_code(exit_intr_info)) {
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
vmcs12->vm_exit_intr_error_code =
vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
}
+ exit_qual = vmx_get_exit_qual(vcpu);
reflect_vmexit:
nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual);
@@ -6182,7 +6192,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
* restored yet. EVMCS will be mapped from
* nested_get_vmcs12_pages().
*/
- kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
} else {
return -EINVAL;
}
@@ -6318,7 +6328,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
msrs->exit_ctls_high |=
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
@@ -6337,7 +6348,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
#ifdef CONFIG_X86_64
VM_ENTRY_IA32E_MODE |
#endif
- VM_ENTRY_LOAD_IA32_PAT;
+ VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
+ VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
msrs->entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
@@ -6391,7 +6403,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
msrs->secondary_ctls_low = 0;
msrs->secondary_ctls_high &=
SECONDARY_EXEC_DESC |
- SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_ENABLE_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -6561,7 +6573,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = {
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.get_state = vmx_get_nested_state,
.set_state = vmx_set_nested_state,
- .get_vmcs12_pages = nested_get_vmcs12_pages,
+ .get_nested_state_pages = nested_get_vmcs12_pages,
.write_log_dirty = nested_vmx_write_pml_buffer,
.enable_evmcs = nested_enable_evmcs,
.get_evmcs_version = nested_get_evmcs_version,
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
new file mode 100644
index 000000000000..f02962dcc72c
--- /dev/null
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm_host.h>
+
+#include <asm/irq_remapping.h>
+#include <asm/cpu.h>
+
+#include "lapic.h"
+#include "posted_intr.h"
+#include "trace.h"
+#include "vmx.h"
+
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
+static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+ return &(to_vmx(vcpu)->pi_desc);
+}
+
+void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ /*
+ * In case of hot-plug or hot-unplug, we may have to undo
+ * vmx_vcpu_pi_put even if there is no assigned device. And we
+ * always keep PI.NDST up to date for simplicity: it makes the
+ * code easier, and CPU migration is not a fast path.
+ */
+ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+ return;
+
+ /*
+ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+ * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
+ * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
+ * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
+ * correctly.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
+ pi_clear_sn(pi_desc);
+ goto after_clear_sn;
+ }
+
+ /* The full case. */
+ do {
+ old.control = new.control = pi_desc->control;
+
+ dest = cpu_physical_id(cpu);
+
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ new.sn = 0;
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
+
+after_clear_sn:
+
+ /*
+ * Clear SN before reading the bitmap. The VT-d firmware
+ * writes the bitmap and reads SN atomically (5.2.3 in the
+ * spec), so it doesn't really have a memory barrier that
+ * pairs with this, but we cannot do that and we need one.
+ */
+ smp_mb__after_atomic();
+
+ if (!pi_is_pir_empty(pi_desc))
+ pi_set_on(pi_desc);
+}
+
+void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(vcpu))
+ return;
+
+ /* Set SN when the vCPU is preempted */
+ if (vcpu->preempted)
+ pi_set_sn(pi_desc);
+}
+
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ do {
+ old.control = new.control = pi_desc->control;
+ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+ "Wakeup handler not enabled while the VCPU is blocked\n");
+
+ dest = cpu_physical_id(vcpu->cpu);
+
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'notification vector' */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
+
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_del(&vcpu->blocked_vcpu_list);
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ vcpu->pre_pcpu = -1;
+ }
+}
+
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ * we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ * 'NDST' <-- vcpu->pre_pcpu
+ * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ * interrupt is posted for this vCPU, we cannot block it, in
+ * this case, return 1, otherwise, return 0.
+ *
+ */
+int pi_pre_block(struct kvm_vcpu *vcpu)
+{
+ unsigned int dest;
+ struct pi_desc old, new;
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(vcpu))
+ return 0;
+
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+ vcpu->pre_pcpu = vcpu->cpu;
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu,
+ vcpu->pre_pcpu));
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ }
+
+ do {
+ old.control = new.control = pi_desc->control;
+
+ WARN((pi_desc->sn == 1),
+ "Warning: SN field of posted-interrupts "
+ "is set before blocking\n");
+
+ /*
+ * Since vCPU can be preempted during this process,
+ * vcpu->cpu could be different with pre_pcpu, we
+ * need to set pre_pcpu as the destination of wakeup
+ * notification event, then we can find the right vCPU
+ * to wakeup in wakeup handler if interrupts happen
+ * when the vCPU is in blocked state.
+ */
+ dest = cpu_physical_id(vcpu->pre_pcpu);
+
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'wakeup vector' */
+ new.nv = POSTED_INTR_WAKEUP_VECTOR;
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
+
+ /* We should not block the vCPU if an interrupt is posted for it. */
+ if (pi_test_on(pi_desc) == 1)
+ __pi_post_block(vcpu);
+
+ local_irq_enable();
+ return (vcpu->pre_pcpu == -1);
+}
+
+void pi_post_block(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->pre_pcpu == -1)
+ return;
+
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ __pi_post_block(vcpu);
+ local_irq_enable();
+}
+
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+void pi_wakeup_handler(void)
+{
+ struct kvm_vcpu *vcpu;
+ int cpu = smp_processor_id();
+
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+ list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+ blocked_vcpu_list) {
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ if (pi_test_on(pi_desc) == 1)
+ kvm_vcpu_kick(vcpu);
+ }
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
+void __init pi_init_cpu(int cpu)
+{
+ INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+ spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
+bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ return pi_test_on(pi_desc) ||
+ (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
+}
+
+
+/*
+ * pi_update_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
+ bool set)
+{
+ struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_irq_routing_table *irq_rt;
+ struct kvm_lapic_irq irq;
+ struct kvm_vcpu *vcpu;
+ struct vcpu_data vcpu_info;
+ int idx, ret = 0;
+
+ if (!kvm_arch_has_assigned_device(kvm) ||
+ !irq_remapping_cap(IRQ_POSTING_CAP) ||
+ !kvm_vcpu_apicv_active(kvm->vcpus[0]))
+ return 0;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+ if (guest_irq >= irq_rt->nr_rt_entries ||
+ hlist_empty(&irq_rt->map[guest_irq])) {
+ pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+ guest_irq, irq_rt->nr_rt_entries);
+ goto out;
+ }
+
+ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+ if (e->type != KVM_IRQ_ROUTING_MSI)
+ continue;
+ /*
+ * VT-d PI cannot support posting multicast/broadcast
+ * interrupts to a vCPU, we still use interrupt remapping
+ * for these kind of interrupts.
+ *
+ * For lowest-priority interrupts, we only support
+ * those with single CPU as the destination, e.g. user
+ * configures the interrupts via /proc/irq or uses
+ * irqbalance to make the interrupts single-CPU.
+ *
+ * We will support full lowest-priority interrupt later.
+ *
+ * In addition, we can only inject generic interrupts using
+ * the PI mechanism, refuse to route others through it.
+ */
+
+ kvm_set_msi_irq(kvm, e, &irq);
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+ !kvm_irq_is_postable(&irq)) {
+ /*
+ * Make sure the IRTE is in remapped mode if
+ * we don't handle it in posted mode.
+ */
+ ret = irq_set_vcpu_affinity(host_irq, NULL);
+ if (ret < 0) {
+ printk(KERN_INFO
+ "failed to back to remapped mode, irq: %u\n",
+ host_irq);
+ goto out;
+ }
+
+ continue;
+ }
+
+ vcpu_info.pi_desc_addr = __pa(&to_vmx(vcpu)->pi_desc);
+ vcpu_info.vector = irq.vector;
+
+ trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
+ vcpu_info.vector, vcpu_info.pi_desc_addr, set);
+
+ if (set)
+ ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
+ else
+ ret = irq_set_vcpu_affinity(host_irq, NULL);
+
+ if (ret < 0) {
+ printk(KERN_INFO "%s: failed to update PI IRTE\n",
+ __func__);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return ret;
+}
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
new file mode 100644
index 000000000000..0bdc41391c5b
--- /dev/null
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_POSTED_INTR_H
+#define __KVM_X86_VMX_POSTED_INTR_H
+
+#define POSTED_INTR_ON 0
+#define POSTED_INTR_SN 1
+
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+ u32 pir[8]; /* Posted interrupt requested */
+ union {
+ struct {
+ /* bit 256 - Outstanding Notification */
+ u16 on : 1,
+ /* bit 257 - Suppress Notification */
+ sn : 1,
+ /* bit 271:258 - Reserved */
+ rsvd_1 : 14;
+ /* bit 279:272 - Notification Vector */
+ u8 nv;
+ /* bit 287:280 - Reserved */
+ u8 rsvd_2;
+ /* bit 319:288 - Notification Destination */
+ u32 ndst;
+ };
+ u64 control;
+ };
+ u32 rsvd[6];
+} __aligned(64);
+
+static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+ return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_on(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_on(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_sn(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
+void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
+void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
+int pi_pre_block(struct kvm_vcpu *vcpu);
+void pi_post_block(struct kvm_vcpu *vcpu);
+void pi_wakeup_handler(void);
+void __init pi_init_cpu(int cpu);
+bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
+int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
+ bool set);
+
+#endif /* __KVM_X86_VMX_POSTED_INTR_H */
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 7a3675fddec2..1472c6c376f7 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -138,6 +138,13 @@ static inline bool is_external_intr(u32 intr_info)
return is_intr_type(intr_info, INTR_TYPE_EXT_INTR);
}
+static inline bool is_exception_with_error_code(u32 intr_info)
+{
+ const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK;
+
+ return (intr_info & mask) == mask;
+}
+
enum vmcs_field_width {
VMCS_FIELD_WIDTH_U16 = 0,
VMCS_FIELD_WIDTH_U64 = 1,
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 799db084a336..90ad7a6246e3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -4,6 +4,7 @@
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
+#include <asm/segment.h>
#define WORD_SIZE (BITS_PER_LONG / 8)
@@ -294,3 +295,36 @@ SYM_FUNC_START(vmread_error_trampoline)
ret
SYM_FUNC_END(vmread_error_trampoline)
+
+SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
+ /*
+ * Unconditionally create a stack frame, getting the correct RSP on the
+ * stack (for x86-64) would take two instructions anyways, and RBP can
+ * be used to restore RSP to make objtool happy (see below).
+ */
+ push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+ /*
+ * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+ * creating the synthetic interrupt stack frame for the IRQ/NMI.
+ */
+ and $-16, %rsp
+ push $__KERNEL_DS
+ push %rbp
+#endif
+ pushf
+ push $__KERNEL_CS
+ CALL_NOSPEC _ASM_ARG1
+
+ /*
+ * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+ * the correct value. objtool doesn't know the callee will IRET and,
+ * without the explicit restore, thinks the stack is getting walloped.
+ * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+ */
+ mov %_ASM_BP, %_ASM_SP
+ pop %_ASM_BP
+ ret
+SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8646a797b7a8..47b8357b9751 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -13,7 +13,6 @@
* Yaniv Kamay <yaniv@qumranet.com>
*/
-#include <linux/frame.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
#include <linux/kernel.h>
@@ -22,6 +21,7 @@
#include <linux/moduleparam.h>
#include <linux/mod_devicetable.h>
#include <linux/mm.h>
+#include <linux/objtool.h>
#include <linux/sched.h>
#include <linux/sched/smt.h>
#include <linux/slab.h>
@@ -56,7 +56,6 @@
#include "lapic.h"
#include "mmu.h"
#include "nested.h"
-#include "ops.h"
#include "pmu.h"
#include "trace.h"
#include "vmcs.h"
@@ -129,6 +128,9 @@ static bool __read_mostly enable_preemption_timer = 1;
module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#endif
+extern bool __read_mostly allow_smaller_maxphyaddr;
+module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
+
#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
#define KVM_VM_CR0_ALWAYS_ON \
@@ -146,8 +148,25 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
RTIT_STATUS_BYTECNT))
-#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
- (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
+/*
+ * List of MSRs that can be directly passed to the guest.
+ * In addition to these x2apic and PT MSRs are handled specially.
+ */
+static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
+ MSR_IA32_SPEC_CTRL,
+ MSR_IA32_PRED_CMD,
+ MSR_IA32_TSC,
+ MSR_FS_BASE,
+ MSR_GS_BASE,
+ MSR_KERNEL_GS_BASE,
+ MSR_IA32_SYSENTER_CS,
+ MSR_IA32_SYSENTER_ESP,
+ MSR_IA32_SYSENTER_EIP,
+ MSR_CORE_C1_RES,
+ MSR_CORE_C3_RESIDENCY,
+ MSR_CORE_C6_RESIDENCY,
+ MSR_CORE_C7_RESIDENCY,
+};
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
@@ -341,9 +360,8 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
};
module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
-static bool guest_state_valid(struct kvm_vcpu *vcpu);
static u32 vmx_segment_access_rights(struct kvm_segment *var);
-static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
u32 msr, int type);
void vmx_vmexit(void);
@@ -398,13 +416,6 @@ DEFINE_PER_CPU(struct vmcs *, current_vmcs);
*/
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-/*
- * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
- * can find which vCPU should be waken up.
- */
-static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
-static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
-
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -447,9 +458,9 @@ static unsigned long host_idt_base;
* will emulate SYSCALL in legacy mode if the vendor string in guest
* CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
* support this emulation, IA32_STAR must always be included in
- * vmx_msr_index[], even in i386 builds.
+ * vmx_uret_msrs_list[], even in i386 builds.
*/
-const u32 vmx_msr_index[] = {
+static const u32 vmx_uret_msrs_list[] = {
#ifdef CONFIG_X86_64
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
#endif
@@ -623,36 +634,71 @@ static inline bool report_flexpriority(void)
return flexpriority_enabled;
}
-static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
+static int possible_passthrough_msr_slot(u32 msr)
+{
+ u32 i;
+
+ for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
+ if (vmx_possible_passthrough_msrs[i] == msr)
+ return i;
+
+ return -ENOENT;
+}
+
+static bool is_valid_passthrough_msr(u32 msr)
+{
+ bool r;
+
+ switch (msr) {
+ case 0x800 ... 0x8ff:
+ /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
+ return true;
+ case MSR_IA32_RTIT_STATUS:
+ case MSR_IA32_RTIT_OUTPUT_BASE:
+ case MSR_IA32_RTIT_OUTPUT_MASK:
+ case MSR_IA32_RTIT_CR3_MATCH:
+ case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
+ /* PT MSRs. These are handled in pt_update_intercept_for_msr() */
+ return true;
+ }
+
+ r = possible_passthrough_msr_slot(msr) != -ENOENT;
+
+ WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+
+ return r;
+}
+
+static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
{
int i;
- for (i = 0; i < vmx->nmsrs; ++i)
- if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
+ for (i = 0; i < vmx->nr_uret_msrs; ++i)
+ if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
return i;
return -1;
}
-struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
+struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
{
int i;
- i = __find_msr_index(vmx, msr);
+ i = __vmx_find_uret_msr(vmx, msr);
if (i >= 0)
- return &vmx->guest_msrs[i];
+ return &vmx->guest_uret_msrs[i];
return NULL;
}
-static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
+static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
+ struct vmx_uret_msr *msr, u64 data)
{
int ret = 0;
u64 old_msr_data = msr->data;
msr->data = data;
- if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
+ if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
preempt_disable();
- ret = kvm_set_shared_msr(msr->index, msr->data,
- msr->mask);
+ ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
preempt_enable();
if (ret)
msr->data = old_msr_data;
@@ -791,6 +837,18 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
+ else {
+ /*
+ * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
+ * between guest and host. In that case we only care about present
+ * faults. For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in
+ * prepare_vmcs02_rare.
+ */
+ bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR));
+ int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0;
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask);
+ }
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -825,7 +883,7 @@ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
vm_exit_controls_clearbit(vmx, exit);
}
-int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
+int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr)
{
unsigned int i;
@@ -859,7 +917,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
}
break;
}
- i = vmx_find_msr_index(&m->guest, msr);
+ i = vmx_find_loadstore_msr_slot(&m->guest, msr);
if (i < 0)
goto skip_guest;
--m->guest.nr;
@@ -867,7 +925,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
skip_guest:
- i = vmx_find_msr_index(&m->host, msr);
+ i = vmx_find_loadstore_msr_slot(&m->host, msr);
if (i < 0)
return;
@@ -926,12 +984,12 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
- i = vmx_find_msr_index(&m->guest, msr);
+ i = vmx_find_loadstore_msr_slot(&m->guest, msr);
if (!entry_only)
- j = vmx_find_msr_index(&m->host, msr);
+ j = vmx_find_loadstore_msr_slot(&m->host, msr);
- if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
- (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) {
+ if ((i < 0 && m->guest.nr == MAX_NR_LOADSTORE_MSRS) ||
+ (j < 0 && m->host.nr == MAX_NR_LOADSTORE_MSRS)) {
printk_once(KERN_WARNING "Not enough msr switch entries. "
"Can't add msr %x\n", msr);
return;
@@ -954,10 +1012,11 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
m->host.val[j].value = host_val;
}
-static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
+static bool update_transition_efer(struct vcpu_vmx *vmx)
{
u64 guest_efer = vmx->vcpu.arch.efer;
u64 ignore_bits = 0;
+ int i;
/* Shadow paging assumes NX to be available. */
if (!enable_ept)
@@ -989,17 +1048,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
else
clear_atomic_switch_msr(vmx, MSR_EFER);
return false;
- } else {
- clear_atomic_switch_msr(vmx, MSR_EFER);
+ }
- guest_efer &= ~ignore_bits;
- guest_efer |= host_efer & ignore_bits;
+ i = __vmx_find_uret_msr(vmx, MSR_EFER);
+ if (i < 0)
+ return false;
- vmx->guest_msrs[efer_offset].data = guest_efer;
- vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+ clear_atomic_switch_msr(vmx, MSR_EFER);
- return true;
- }
+ guest_efer &= ~ignore_bits;
+ guest_efer |= host_efer & ignore_bits;
+
+ vmx->guest_uret_msrs[i].data = guest_efer;
+ vmx->guest_uret_msrs[i].mask = ~ignore_bits;
+
+ return true;
}
#ifdef CONFIG_X86_32
@@ -1037,6 +1100,12 @@ static inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
!(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
}
+static inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base)
+{
+ /* The base must be 128-byte aligned and a legal physical address. */
+ return !kvm_vcpu_is_illegal_gpa(vcpu, base) && !(base & 0x7f);
+}
+
static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
{
u32 i;
@@ -1141,12 +1210,12 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* when guest state is loaded. This happens when guest transitions
* to/from long-mode by setting MSR_EFER.LMA.
*/
- if (!vmx->guest_msrs_ready) {
- vmx->guest_msrs_ready = true;
- for (i = 0; i < vmx->save_nmsrs; ++i)
- kvm_set_shared_msr(vmx->guest_msrs[i].index,
- vmx->guest_msrs[i].data,
- vmx->guest_msrs[i].mask);
+ if (!vmx->guest_uret_msrs_loaded) {
+ vmx->guest_uret_msrs_loaded = true;
+ for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
+ kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
+ vmx->guest_uret_msrs[i].data,
+ vmx->guest_uret_msrs[i].mask);
}
@@ -1230,7 +1299,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
#endif
load_fixmap_gdt(raw_smp_processor_id());
vmx->guest_state_loaded = false;
- vmx->guest_msrs_ready = false;
+ vmx->guest_uret_msrs_loaded = false;
}
#ifdef CONFIG_X86_64
@@ -1253,62 +1322,6 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
}
#endif
-static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
-
- /*
- * In case of hot-plug or hot-unplug, we may have to undo
- * vmx_vcpu_pi_put even if there is no assigned device. And we
- * always keep PI.NDST up to date for simplicity: it makes the
- * code easier, and CPU migration is not a fast path.
- */
- if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
- return;
-
- /*
- * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
- * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
- * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
- * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
- * correctly.
- */
- if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
- pi_clear_sn(pi_desc);
- goto after_clear_sn;
- }
-
- /* The full case. */
- do {
- old.control = new.control = pi_desc->control;
-
- dest = cpu_physical_id(cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- new.sn = 0;
- } while (cmpxchg64(&pi_desc->control, old.control,
- new.control) != old.control);
-
-after_clear_sn:
-
- /*
- * Clear SN before reading the bitmap. The VT-d firmware
- * writes the bitmap and reads SN atomically (5.2.3 in the
- * spec), so it doesn't really have a memory barrier that
- * pairs with this, but we cannot do that and we need one.
- */
- smp_mb__after_atomic();
-
- if (!pi_is_pir_empty(pi_desc))
- pi_set_on(pi_desc);
-}
-
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
struct loaded_vmcs *buddy)
{
@@ -1392,20 +1405,6 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmx->host_debugctlmsr = get_debugctlmsr();
}
-static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
- return;
-
- /* Set SN when the vCPU is preempted */
- if (vcpu->preempted)
- pi_set_sn(pi_desc);
-}
-
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
vmx_vcpu_pi_put(vcpu);
@@ -1415,7 +1414,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
static bool emulation_required(struct kvm_vcpu *vcpu)
{
- return emulate_invalid_guest_state && !guest_state_valid(vcpu);
+ return emulate_invalid_guest_state && !vmx_guest_state_valid(vcpu);
}
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
@@ -1441,7 +1440,7 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long old_rflags;
- if (enable_unrestricted_guest) {
+ if (is_unrestricted_guest(vcpu)) {
kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
vmx->rflags = rflags;
vmcs_writel(GUEST_RFLAGS, rflags);
@@ -1561,6 +1560,11 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
+static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
+{
+ return true;
+}
+
static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
unsigned long rip, orig_rip;
@@ -1599,33 +1603,6 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
}
/*
- * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns
- * KVM_EXIT_INTERNAL_ERROR for cases not currently handled by KVM. Return value
- * indicates whether exit to userspace is needed.
- */
-int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
- struct x86_exception *e)
-{
- if (r == X86EMUL_PROPAGATE_FAULT) {
- kvm_inject_emulated_page_fault(vcpu, e);
- return 1;
- }
-
- /*
- * In case kvm_read/write_guest_virt*() failed with X86EMUL_IO_NEEDED
- * while handling a VMX instruction KVM could've handled the request
- * correctly by exiting to userspace and performing I/O but there
- * doesn't seem to be a real use-case behind such requests, just return
- * KVM_EXIT_INTERNAL_ERROR for now.
- */
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
-
- return 0;
-}
-
-/*
* Recognizes a pending MTF VM-exit and records the nested state for later
* delivery.
*/
@@ -1708,16 +1685,19 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
vmx_clear_hlt(vcpu);
}
-/*
- * Swap MSR entry in host/guest MSR entry array.
- */
-static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
+static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
{
- struct shared_msr_entry tmp;
+ struct vmx_uret_msr tmp;
+ int from, to;
+
+ from = __vmx_find_uret_msr(vmx, msr);
+ if (from < 0)
+ return;
+ to = vmx->nr_active_uret_msrs++;
- tmp = vmx->guest_msrs[to];
- vmx->guest_msrs[to] = vmx->guest_msrs[from];
- vmx->guest_msrs[from] = tmp;
+ tmp = vmx->guest_uret_msrs[to];
+ vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
+ vmx->guest_uret_msrs[from] = tmp;
}
/*
@@ -1727,38 +1707,26 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
*/
static void setup_msrs(struct vcpu_vmx *vmx)
{
- int save_nmsrs, index;
-
- save_nmsrs = 0;
+ vmx->guest_uret_msrs_loaded = false;
+ vmx->nr_active_uret_msrs = 0;
#ifdef CONFIG_X86_64
/*
* The SYSCALL MSRs are only needed on long mode guests, and only
* when EFER.SCE is set.
*/
if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
- index = __find_msr_index(vmx, MSR_STAR);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_LSTAR);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
+ vmx_setup_uret_msr(vmx, MSR_STAR);
+ vmx_setup_uret_msr(vmx, MSR_LSTAR);
+ vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
}
#endif
- index = __find_msr_index(vmx, MSR_EFER);
- if (index >= 0 && update_transition_efer(vmx, index))
- move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_TSC_AUX);
- if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
- move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
-
- vmx->save_nmsrs = save_nmsrs;
- vmx->guest_msrs_ready = false;
+ if (update_transition_efer(vmx))
+ vmx_setup_uret_msr(vmx, MSR_EFER);
+
+ if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
+ vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
+
+ vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
if (cpu_has_vmx_msr_bitmap())
vmx_update_msr_bitmap(&vmx->vcpu);
@@ -1828,7 +1796,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct shared_msr_entry *msr;
+ struct vmx_uret_msr *msr;
u32 index;
switch (msr_info->index) {
@@ -1849,7 +1817,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
return 1;
- goto find_shared_msr;
+ goto find_uret_msr;
case MSR_IA32_UMWAIT_CONTROL:
if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
return 1;
@@ -1956,10 +1924,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
return 1;
- goto find_shared_msr;
+ goto find_uret_msr;
default:
- find_shared_msr:
- msr = find_msr_entry(vmx, msr_info->index);
+ find_uret_msr:
+ msr = vmx_find_uret_msr(vmx, msr_info->index);
if (msr) {
msr_info->data = msr->data;
break;
@@ -1988,7 +1956,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct shared_msr_entry *msr;
+ struct vmx_uret_msr *msr;
int ret = 0;
u32 msr_index = msr_info->index;
u64 data = msr_info->data;
@@ -2082,7 +2050,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* in the merging. We update the vmcs01 here for L1 as well
* since it will end up touching the MSR anyway now.
*/
- vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
+ vmx_disable_intercept_for_msr(vcpu,
MSR_IA32_SPEC_CTRL,
MSR_TYPE_RW);
break;
@@ -2092,7 +2060,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
return 1;
- goto find_shared_msr;
+ goto find_uret_msr;
case MSR_IA32_PRED_CMD:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
@@ -2118,8 +2086,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* vmcs02.msr_bitmap here since it gets completely overwritten
* in the merging.
*/
- vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
- MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W);
break;
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
@@ -2169,7 +2136,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vmcs_write64(GUEST_IA32_RTIT_CTL, data);
vmx->pt_desc.guest.ctl = data;
- pt_update_intercept_for_msr(vmx);
+ pt_update_intercept_for_msr(vcpu);
break;
case MSR_IA32_RTIT_STATUS:
if (!pt_can_write_msr(vmx))
@@ -2194,7 +2161,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_single_range_output))
return 1;
- if (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)
+ if (!pt_output_base_valid(vcpu, data))
return 1;
vmx->pt_desc.guest.output_base = data;
break;
@@ -2229,13 +2196,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
return 1;
- goto find_shared_msr;
+ goto find_uret_msr;
default:
- find_shared_msr:
- msr = find_msr_entry(vmx, msr_index);
+ find_uret_msr:
+ msr = vmx_find_uret_msr(vmx, msr_index);
if (msr)
- ret = vmx_set_guest_msr(vmx, msr, data);
+ ret = vmx_set_guest_uret_msr(vmx, msr, data);
else
ret = kvm_set_msr_common(vcpu, msr_info);
}
@@ -2267,7 +2234,8 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits;
break;
case VCPU_EXREG_CR3:
- if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
+ if (is_unrestricted_guest(vcpu) ||
+ (enable_ept && is_paging(vcpu)))
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
break;
case VCPU_EXREG_CR4:
@@ -2448,7 +2416,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
SECONDARY_EXEC_DESC |
- SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_ENABLE_RDTSCP |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -2592,8 +2560,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
vmcs_conf->vmexit_ctrl = _vmexit_control;
vmcs_conf->vmentry_ctrl = _vmentry_control;
- if (static_branch_unlikely(&enable_evmcs))
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (enlightened_vmcs)
evmcs_sanitize_exec_ctrls(vmcs_conf);
+#endif
return 0;
}
@@ -2862,13 +2832,14 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
-void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+ struct vmx_uret_msr *msr = vmx_find_uret_msr(vmx, MSR_EFER);
+ /* Nothing to do if hardware doesn't support EFER. */
if (!msr)
- return;
+ return 0;
vcpu->arch.efer = efer;
if (efer & EFER_LMA) {
@@ -2880,6 +2851,7 @@ void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
msr->data = efer & ~EFER_LME;
}
setup_msrs(vmx);
+ return 0;
}
#ifdef CONFIG_X86_64
@@ -3033,7 +3005,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
unsigned long hw_cr0;
hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
- if (enable_unrestricted_guest)
+ if (is_unrestricted_guest(vcpu))
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
else {
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
@@ -3054,7 +3026,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
- if (enable_ept && !enable_unrestricted_guest)
+ if (enable_ept && !is_unrestricted_guest(vcpu))
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -3134,7 +3106,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
unsigned long hw_cr4;
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
- if (enable_unrestricted_guest)
+ if (is_unrestricted_guest(vcpu))
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
else if (vmx->rmode.vm86_active)
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
@@ -3169,7 +3141,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vcpu->arch.cr4 = cr4;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
- if (!enable_unrestricted_guest) {
+ if (!is_unrestricted_guest(vcpu)) {
if (enable_ept) {
if (!is_paging(vcpu)) {
hw_cr4 &= ~X86_CR4_PAE;
@@ -3309,7 +3281,7 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
* tree. Newer qemu binaries with that qemu fix would not need this
* kvm hack.
*/
- if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
+ if (is_unrestricted_guest(vcpu) && (seg != VCPU_SREG_LDTR))
var->type |= 0x1; /* Accessed */
vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
@@ -3498,11 +3470,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
* not.
* We assume that registers are always usable
*/
-static bool guest_state_valid(struct kvm_vcpu *vcpu)
+bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu)
{
- if (enable_unrestricted_guest)
- return true;
-
/* real mode guest state checks */
if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
@@ -3688,11 +3657,52 @@ void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
-static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
+static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+{
+ int f = sizeof(unsigned long);
+
+ if (msr <= 0x1fff)
+ __clear_bit(msr, msr_bitmap + 0x000 / f);
+ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+ __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+}
+
+static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
+{
+ int f = sizeof(unsigned long);
+
+ if (msr <= 0x1fff)
+ __clear_bit(msr, msr_bitmap + 0x800 / f);
+ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+ __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
+}
+
+static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
+{
+ int f = sizeof(unsigned long);
+
+ if (msr <= 0x1fff)
+ __set_bit(msr, msr_bitmap + 0x000 / f);
+ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+ __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
+}
+
+static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
{
int f = sizeof(unsigned long);
+ if (msr <= 0x1fff)
+ __set_bit(msr, msr_bitmap + 0x800 / f);
+ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
+ __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
+}
+
+static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
+ u32 msr, int type)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -3700,36 +3710,44 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
evmcs_touch_msr_bitmap();
/*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __clear_bit(msr, msr_bitmap + 0x000 / f);
+ * Mark the desired intercept state in shadow bitmap, this is needed
+ * for resync when the MSR filters change.
+ */
+ if (is_valid_passthrough_msr(msr)) {
+ int idx = possible_passthrough_msr_slot(msr);
+
+ if (idx != -ENOENT) {
+ if (type & MSR_TYPE_R)
+ clear_bit(idx, vmx->shadow_msr_intercept.read);
+ if (type & MSR_TYPE_W)
+ clear_bit(idx, vmx->shadow_msr_intercept.write);
+ }
+ }
- if (type & MSR_TYPE_W)
- /* write-low */
- __clear_bit(msr, msr_bitmap + 0x800 / f);
+ if ((type & MSR_TYPE_R) &&
+ !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
+ vmx_set_msr_bitmap_read(msr_bitmap, msr);
+ type &= ~MSR_TYPE_R;
+ }
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __clear_bit(msr, msr_bitmap + 0x400 / f);
+ if ((type & MSR_TYPE_W) &&
+ !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_WRITE)) {
+ vmx_set_msr_bitmap_write(msr_bitmap, msr);
+ type &= ~MSR_TYPE_W;
+ }
- if (type & MSR_TYPE_W)
- /* write-high */
- __clear_bit(msr, msr_bitmap + 0xc00 / f);
+ if (type & MSR_TYPE_R)
+ vmx_clear_msr_bitmap_read(msr_bitmap, msr);
- }
+ if (type & MSR_TYPE_W)
+ vmx_clear_msr_bitmap_write(msr_bitmap, msr);
}
-static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
u32 msr, int type)
{
- int f = sizeof(unsigned long);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -3738,39 +3756,34 @@ static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitm
evmcs_touch_msr_bitmap();
/*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __set_bit(msr, msr_bitmap + 0x000 / f);
-
- if (type & MSR_TYPE_W)
- /* write-low */
- __set_bit(msr, msr_bitmap + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __set_bit(msr, msr_bitmap + 0x400 / f);
+ * Mark the desired intercept state in shadow bitmap, this is needed
+ * for resync when the MSR filter changes.
+ */
+ if (is_valid_passthrough_msr(msr)) {
+ int idx = possible_passthrough_msr_slot(msr);
+
+ if (idx != -ENOENT) {
+ if (type & MSR_TYPE_R)
+ set_bit(idx, vmx->shadow_msr_intercept.read);
+ if (type & MSR_TYPE_W)
+ set_bit(idx, vmx->shadow_msr_intercept.write);
+ }
+ }
- if (type & MSR_TYPE_W)
- /* write-high */
- __set_bit(msr, msr_bitmap + 0xc00 / f);
+ if (type & MSR_TYPE_R)
+ vmx_set_msr_bitmap_read(msr_bitmap, msr);
- }
+ if (type & MSR_TYPE_W)
+ vmx_set_msr_bitmap_write(msr_bitmap, msr);
}
-static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type, bool value)
+static __always_inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
+ u32 msr, int type, bool value)
{
if (value)
- vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
+ vmx_enable_intercept_for_msr(vcpu, msr, type);
else
- vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
+ vmx_disable_intercept_for_msr(vcpu, msr, type);
}
static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
@@ -3788,35 +3801,47 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
return mode;
}
-static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
- u8 mode)
+static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
{
+ unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
+ unsigned long read_intercept;
int msr;
+ read_intercept = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
+
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
- unsigned word = msr / BITS_PER_LONG;
- msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
- msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
+ unsigned int read_idx = msr / BITS_PER_LONG;
+ unsigned int write_idx = read_idx + (0x800 / sizeof(long));
+
+ msr_bitmap[read_idx] = read_intercept;
+ msr_bitmap[write_idx] = ~0ul;
}
+}
- if (mode & MSR_BITMAP_MODE_X2APIC) {
- /*
- * TPR reads and writes can be virtualized even if virtual interrupt
- * delivery is not in use.
- */
- vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
- if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
- vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
- vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
- vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
- }
+static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode)
+{
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+ vmx_reset_x2apic_msrs(vcpu, mode);
+
+ /*
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
+ */
+ vmx_set_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW,
+ !(mode & MSR_BITMAP_MODE_X2APIC));
+
+ if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
+ vmx_enable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(vcpu, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
}
}
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
u8 mode = vmx_msr_bitmap_mode(vcpu);
u8 changed = mode ^ vmx->msr_bitmap_mode;
@@ -3824,30 +3849,24 @@ void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
return;
if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
- vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
+ vmx_update_msr_bitmap_x2apic(vcpu, mode);
vmx->msr_bitmap_mode = mode;
}
-void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
+void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
{
- unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
u32 i;
- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS,
- MSR_TYPE_RW, flag);
- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE,
- MSR_TYPE_RW, flag);
- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK,
- MSR_TYPE_RW, flag);
- vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH,
- MSR_TYPE_RW, flag);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_STATUS, MSR_TYPE_RW, flag);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_BASE, MSR_TYPE_RW, flag);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_OUTPUT_MASK, MSR_TYPE_RW, flag);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_CR3_MATCH, MSR_TYPE_RW, flag);
for (i = 0; i < vmx->pt_desc.addr_range; i++) {
- vmx_set_intercept_for_msr(msr_bitmap,
- MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
- vmx_set_intercept_for_msr(msr_bitmap,
- MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
}
}
@@ -3871,6 +3890,29 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
return ((rvi & 0xf0) > (vppr & 0xf0));
}
+static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 i;
+
+ /*
+ * Set intercept permissions for all potentially passed through MSRs
+ * again. They will automatically get filtered through the MSR filter,
+ * so we are back in sync after this.
+ */
+ for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+ u32 msr = vmx_possible_passthrough_msrs[i];
+ bool read = test_bit(i, vmx->shadow_msr_intercept.read);
+ bool write = test_bit(i, vmx->shadow_msr_intercept.write);
+
+ vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_R, read);
+ vmx_set_intercept_for_msr(vcpu, msr, MSR_TYPE_W, write);
+ }
+
+ pt_update_intercept_for_msr(vcpu);
+ vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu));
+}
+
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
bool nested)
{
@@ -4028,13 +4070,16 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
{
- vmx->vcpu.arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS;
+ struct kvm_vcpu *vcpu = &vmx->vcpu;
+
+ vcpu->arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS &
+ ~vcpu->arch.cr4_guest_rsvd_bits;
if (!enable_ept)
- vmx->vcpu.arch.cr4_guest_owned_bits &= ~X86_CR4_PGE;
+ vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PGE;
if (is_guest_mode(&vmx->vcpu))
- vmx->vcpu.arch.cr4_guest_owned_bits &=
- ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
- vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
+ vcpu->arch.cr4_guest_owned_bits &=
+ ~get_vmcs12(vcpu)->cr4_guest_host_mask;
+ vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits);
}
u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
@@ -4099,6 +4144,61 @@ u32 vmx_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}
+/*
+ * Adjust a single secondary execution control bit to intercept/allow an
+ * instruction in the guest. This is usually done based on whether or not a
+ * feature has been exposed to the guest in order to correctly emulate faults.
+ */
+static inline void
+vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
+ u32 control, bool enabled, bool exiting)
+{
+ /*
+ * If the control is for an opt-in feature, clear the control if the
+ * feature is not exposed to the guest, i.e. not enabled. If the
+ * control is opt-out, i.e. an exiting control, clear the control if
+ * the feature _is_ exposed to the guest, i.e. exiting/interception is
+ * disabled for the associated instruction. Note, the caller is
+ * responsible presetting exec_control to set all supported bits.
+ */
+ if (enabled == exiting)
+ *exec_control &= ~control;
+
+ /*
+ * Update the nested MSR settings so that a nested VMM can/can't set
+ * controls for features that are/aren't exposed to the guest.
+ */
+ if (nested) {
+ if (enabled)
+ vmx->nested.msrs.secondary_ctls_high |= control;
+ else
+ vmx->nested.msrs.secondary_ctls_high &= ~control;
+ }
+}
+
+/*
+ * Wrapper macro for the common case of adjusting a secondary execution control
+ * based on a single guest CPUID bit, with a dedicated feature bit. This also
+ * verifies that the control is actually supported by KVM and hardware.
+ */
+#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
+({ \
+ bool __enabled; \
+ \
+ if (cpu_has_vmx_##name()) { \
+ __enabled = guest_cpuid_has(&(vmx)->vcpu, \
+ X86_FEATURE_##feat_name); \
+ vmx_adjust_secondary_exec_control(vmx, exec_control, \
+ SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
+ } \
+})
+
+/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
+#define vmx_adjust_sec_exec_feature(vmx, exec_control, lname, uname) \
+ vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, ENABLE_##uname, false)
+
+#define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \
+ vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true)
static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
{
@@ -4139,7 +4239,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_pml)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
- if (vmx_xsaves_supported()) {
+ if (cpu_has_vmx_xsaves()) {
/* Exposing XSAVES only when XSAVE is exposed */
bool xsaves_enabled =
boot_cpu_has(X86_FEATURE_XSAVE) &&
@@ -4148,101 +4248,29 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
vcpu->arch.xsaves_enabled = xsaves_enabled;
- if (!xsaves_enabled)
- exec_control &= ~SECONDARY_EXEC_XSAVES;
-
- if (nested) {
- if (xsaves_enabled)
- vmx->nested.msrs.secondary_ctls_high |=
- SECONDARY_EXEC_XSAVES;
- else
- vmx->nested.msrs.secondary_ctls_high &=
- ~SECONDARY_EXEC_XSAVES;
- }
+ vmx_adjust_secondary_exec_control(vmx, &exec_control,
+ SECONDARY_EXEC_XSAVES,
+ xsaves_enabled, false);
}
- if (cpu_has_vmx_rdtscp()) {
- bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
- if (!rdtscp_enabled)
- exec_control &= ~SECONDARY_EXEC_RDTSCP;
-
- if (nested) {
- if (rdtscp_enabled)
- vmx->nested.msrs.secondary_ctls_high |=
- SECONDARY_EXEC_RDTSCP;
- else
- vmx->nested.msrs.secondary_ctls_high &=
- ~SECONDARY_EXEC_RDTSCP;
- }
- }
-
- if (cpu_has_vmx_invpcid()) {
- /* Exposing INVPCID only when PCID is exposed */
- bool invpcid_enabled =
- guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
- guest_cpuid_has(vcpu, X86_FEATURE_PCID);
-
- if (!invpcid_enabled) {
- exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
- guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
- }
+ vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
- if (nested) {
- if (invpcid_enabled)
- vmx->nested.msrs.secondary_ctls_high |=
- SECONDARY_EXEC_ENABLE_INVPCID;
- else
- vmx->nested.msrs.secondary_ctls_high &=
- ~SECONDARY_EXEC_ENABLE_INVPCID;
- }
- }
-
- if (vmx_rdrand_supported()) {
- bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
- if (rdrand_enabled)
- exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
-
- if (nested) {
- if (rdrand_enabled)
- vmx->nested.msrs.secondary_ctls_high |=
- SECONDARY_EXEC_RDRAND_EXITING;
- else
- vmx->nested.msrs.secondary_ctls_high &=
- ~SECONDARY_EXEC_RDRAND_EXITING;
- }
- }
-
- if (vmx_rdseed_supported()) {
- bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
- if (rdseed_enabled)
- exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
-
- if (nested) {
- if (rdseed_enabled)
- vmx->nested.msrs.secondary_ctls_high |=
- SECONDARY_EXEC_RDSEED_EXITING;
- else
- vmx->nested.msrs.secondary_ctls_high &=
- ~SECONDARY_EXEC_RDSEED_EXITING;
- }
- }
+ /*
+ * Expose INVPCID if and only if PCID is also exposed to the guest.
+ * INVPCID takes a #UD when it's disabled in the VMCS, but a #GP or #PF
+ * if CR4.PCIDE=0. Enumerating CPUID.INVPCID=1 would lead to incorrect
+ * behavior from the guest perspective (it would expect #GP or #PF).
+ */
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
+ guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
+ vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
- if (vmx_waitpkg_supported()) {
- bool waitpkg_enabled =
- guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
- if (!waitpkg_enabled)
- exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+ vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
+ vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED);
- if (nested) {
- if (waitpkg_enabled)
- vmx->nested.msrs.secondary_ctls_high |=
- SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
- else
- vmx->nested.msrs.secondary_ctls_high &=
- ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
- }
- }
+ vmx_adjust_sec_exec_control(vmx, &exec_control, waitpkg, WAITPKG,
+ ENABLE_USR_WAIT_PAUSE, false);
vmx->secondary_exec_control = exec_control;
}
@@ -4335,7 +4363,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
if (vmx->vpid != 0)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
- if (vmx_xsaves_supported())
+ if (cpu_has_vmx_xsaves())
vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
if (enable_pml) {
@@ -4352,16 +4380,6 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmx->pt_desc.guest.output_mask = 0x7F;
vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
}
-
- /*
- * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
- * between guest and host. In that case we only care about present
- * faults.
- */
- if (enable_ept) {
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, PFERR_PRESENT_MASK);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, PFERR_PRESENT_MASK);
- }
}
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -4803,6 +4821,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
* EPT will cause page fault only if we need to
* detect illegal GPAs.
*/
+ WARN_ON_ONCE(!allow_smaller_maxphyaddr);
kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
return 1;
} else
@@ -5148,7 +5167,8 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
static int handle_invd(struct kvm_vcpu *vcpu)
{
- return kvm_emulate_instruction(vcpu, 0);
+ /* Treat an INVD instruction as a NOP and just skip it. */
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -5331,7 +5351,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* would also use advanced VM-exit information for EPT violations to
* reconstruct the page fault error code.
*/
- if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa)))
+ if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
return kvm_emulate_instruction(vcpu, 0);
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
@@ -5442,25 +5462,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
}
-/*
- * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
- */
-static void wakeup_handler(void)
-{
- struct kvm_vcpu *vcpu;
- int cpu = smp_processor_id();
-
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
- list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
- blocked_vcpu_list) {
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (pi_test_on(pi_desc) == 1)
- kvm_vcpu_kick(vcpu);
- }
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
-}
-
static void vmx_enable_tdp(void)
{
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@ -5524,16 +5525,11 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
{
u32 vmx_instruction_info;
unsigned long type;
- bool pcid_enabled;
gva_t gva;
- struct x86_exception e;
- unsigned i;
- unsigned long roots_to_free = 0;
struct {
u64 pcid;
u64 gla;
} operand;
- int r;
if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5556,68 +5552,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
sizeof(operand), &gva))
return 1;
- r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
- if (r != X86EMUL_CONTINUE)
- return vmx_handle_memory_failure(vcpu, r, &e);
-
- if (operand.pcid >> 12 != 0) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
- pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
-
- switch (type) {
- case INVPCID_TYPE_INDIV_ADDR:
- if ((!pcid_enabled && (operand.pcid != 0)) ||
- is_noncanonical_address(operand.gla, vcpu)) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
- kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
- return kvm_skip_emulated_instruction(vcpu);
-
- case INVPCID_TYPE_SINGLE_CTXT:
- if (!pcid_enabled && (operand.pcid != 0)) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
- if (kvm_get_active_pcid(vcpu) == operand.pcid) {
- kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
- }
-
- for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
- == operand.pcid)
- roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
-
- kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
- /*
- * If neither the current cr3 nor any of the prev_roots use the
- * given PCID, then nothing needs to be done here because a
- * resync will happen anyway before switching to any other CR3.
- */
-
- return kvm_skip_emulated_instruction(vcpu);
-
- case INVPCID_TYPE_ALL_NON_GLOBAL:
- /*
- * Currently, KVM doesn't mark global entries in the shadow
- * page tables, so a non-global flush just degenerates to a
- * global flush. If needed, we could optimize this later by
- * keeping track of global entries in shadow page tables.
- */
-
- fallthrough;
- case INVPCID_TYPE_ALL_INCL_GLOBAL:
- kvm_mmu_unload(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
-
- default:
- BUG(); /* We have already checked above that type <= 3 */
- }
+ return kvm_handle_invpcid(vcpu, type, gva);
}
static int handle_pml_full(struct kvm_vcpu *vcpu)
@@ -5746,10 +5681,24 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
static const int kvm_vmx_max_exit_handlers =
ARRAY_SIZE(kvm_vmx_exit_handlers);
-static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
+static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+ u32 *intr_info, u32 *error_code)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
*info1 = vmx_get_exit_qual(vcpu);
- *info2 = vmx_get_intr_info(vcpu);
+ if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+ *info2 = vmx->idt_vectoring_info;
+ *intr_info = vmx_get_intr_info(vcpu);
+ if (is_exception_with_error_code(*intr_info))
+ *error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ else
+ *error_code = 0;
+ } else {
+ *info2 = 0;
+ *intr_info = 0;
+ *error_code = 0;
+ }
}
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
@@ -6383,14 +6332,6 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
return max_irr;
}
-static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- return pi_test_on(pi_desc) ||
- (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
-}
-
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
if (!kvm_vcpu_apicv_active(vcpu))
@@ -6410,70 +6351,43 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
}
+void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
+
+static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
+{
+ unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
+ gate_desc *desc = (gate_desc *)host_idt_base + vector;
+
+ kvm_before_interrupt(vcpu);
+ vmx_do_interrupt_nmi_irqoff(gate_offset(desc));
+ kvm_after_interrupt(vcpu);
+}
+
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
/* if exit due to PF check for async PF */
- if (is_page_fault(intr_info)) {
+ if (is_page_fault(intr_info))
vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
/* Handle machine checks before interrupts are enabled */
- } else if (is_machine_check(intr_info)) {
+ else if (is_machine_check(intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
- } else if (is_nmi(intr_info)) {
- kvm_before_interrupt(&vmx->vcpu);
- asm("int $2");
- kvm_after_interrupt(&vmx->vcpu);
- }
+ else if (is_nmi(intr_info))
+ handle_interrupt_nmi_irqoff(&vmx->vcpu, intr_info);
}
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
- unsigned int vector;
- unsigned long entry;
-#ifdef CONFIG_X86_64
- unsigned long tmp;
-#endif
- gate_desc *desc;
u32 intr_info = vmx_get_intr_info(vcpu);
if (WARN_ONCE(!is_external_intr(intr_info),
"KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
- vector = intr_info & INTR_INFO_VECTOR_MASK;
- desc = (gate_desc *)host_idt_base + vector;
- entry = gate_offset(desc);
-
- kvm_before_interrupt(vcpu);
-
- asm volatile(
-#ifdef CONFIG_X86_64
- "mov %%rsp, %[sp]\n\t"
- "and $-16, %%rsp\n\t"
- "push %[ss]\n\t"
- "push %[sp]\n\t"
-#endif
- "pushf\n\t"
- "push %[cs]\n\t"
- CALL_NOSPEC
- :
-#ifdef CONFIG_X86_64
- [sp]"=&r"(tmp),
-#endif
- ASM_CALL_CONSTRAINT
- :
- [thunk_target]"r"(entry),
-#ifdef CONFIG_X86_64
- [ss]"i"(__KERNEL_DS),
-#endif
- [cs]"i"(__KERNEL_CS)
- );
-
- kvm_after_interrupt(vcpu);
+ handle_interrupt_nmi_irqoff(vcpu, intr_info);
}
-STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
@@ -6800,9 +6714,7 @@ reenter_guest:
if (enable_preemption_timer)
vmx_update_hv_timer(vcpu);
- if (lapic_in_kernel(vcpu) &&
- vcpu->arch.apic->lapic_timer.timer_advance_ns)
- kvm_wait_lapic_expire(vcpu);
+ kvm_wait_lapic_expire(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -6924,7 +6836,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx;
- unsigned long *msr_bitmap;
int i, cpu, err;
BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0);
@@ -6946,20 +6857,20 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
goto free_vpid;
}
- BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS);
+ BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
- u32 index = vmx_msr_index[i];
+ for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
+ u32 index = vmx_uret_msrs_list[i];
u32 data_low, data_high;
- int j = vmx->nmsrs;
+ int j = vmx->nr_uret_msrs;
if (rdmsr_safe(index, &data_low, &data_high) < 0)
continue;
if (wrmsr_safe(index, data_low, data_high) < 0)
continue;
- vmx->guest_msrs[j].index = i;
- vmx->guest_msrs[j].data = 0;
+ vmx->guest_uret_msrs[j].slot = i;
+ vmx->guest_uret_msrs[j].data = 0;
switch (index) {
case MSR_IA32_TSX_CTRL:
/*
@@ -6967,32 +6878,35 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
* let's avoid changing CPUID bits under the host
* kernel's feet.
*/
- vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+ vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
break;
default:
- vmx->guest_msrs[j].mask = -1ull;
+ vmx->guest_uret_msrs[j].mask = -1ull;
break;
}
- ++vmx->nmsrs;
+ ++vmx->nr_uret_msrs;
}
err = alloc_loaded_vmcs(&vmx->vmcs01);
if (err < 0)
goto free_pml;
- msr_bitmap = vmx->vmcs01.msr_bitmap;
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+ /* The MSR bitmap starts with all ones */
+ bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+ bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
if (kvm_cstate_in_guest(vcpu->kvm)) {
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
- vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
}
vmx->msr_bitmap_mode = 0;
@@ -7016,8 +6930,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
}
if (nested)
- nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept);
+ memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs));
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@@ -7337,13 +7250,18 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
update_intel_pt_cfg(vcpu);
if (boot_cpu_has(X86_FEATURE_RTM)) {
- struct shared_msr_entry *msr;
- msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL);
+ struct vmx_uret_msr *msr;
+ msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
if (msr) {
bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
- vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
+ vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
}
}
+
+ set_cr4_guest_host_mask(vmx);
+
+ /* Refresh #PF interception to account for MAXPHYADDR changes. */
+ update_exception_bitmap(vcpu);
}
static __init void vmx_set_cpu_caps(void)
@@ -7367,14 +7285,14 @@ static __init void vmx_set_cpu_caps(void)
/* CPUID 0xD.1 */
supported_xss = 0;
- if (!vmx_xsaves_supported())
+ if (!cpu_has_vmx_xsaves())
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
/* CPUID 0x80000001 */
if (!cpu_has_vmx_rdtscp())
kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
- if (vmx_waitpkg_supported())
+ if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
@@ -7430,7 +7348,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
* Because it is marked as EmulateOnUD, we need to intercept it here.
*/
case x86_intercept_rdtscp:
- if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
exception->vector = UD_VECTOR;
exception->error_code_valid = false;
return X86EMUL_PROPAGATE_FAULT;
@@ -7562,107 +7480,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
-static void __pi_post_block(struct kvm_vcpu *vcpu)
-{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
-
- do {
- old.control = new.control = pi_desc->control;
- WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
- "Wakeup handler not enabled while the VCPU is blocked\n");
-
- dest = cpu_physical_id(vcpu->cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (cmpxchg64(&pi_desc->control, old.control,
- new.control) != old.control);
-
- if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- vcpu->pre_pcpu = -1;
- }
-}
-
-/*
- * This routine does the following things for vCPU which is going
- * to be blocked if VT-d PI is enabled.
- * - Store the vCPU to the wakeup list, so when interrupts happen
- * we can find the right vCPU to wake up.
- * - Change the Posted-interrupt descriptor as below:
- * 'NDST' <-- vcpu->pre_pcpu
- * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
- * - If 'ON' is set during this process, which means at least one
- * interrupt is posted for this vCPU, we cannot block it, in
- * this case, return 1, otherwise, return 0.
- *
- */
-static int pi_pre_block(struct kvm_vcpu *vcpu)
-{
- unsigned int dest;
- struct pi_desc old, new;
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
- return 0;
-
- WARN_ON(irqs_disabled());
- local_irq_disable();
- if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu,
- vcpu->pre_pcpu));
- spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
- }
-
- do {
- old.control = new.control = pi_desc->control;
-
- WARN((pi_desc->sn == 1),
- "Warning: SN field of posted-interrupts "
- "is set before blocking\n");
-
- /*
- * Since vCPU can be preempted during this process,
- * vcpu->cpu could be different with pre_pcpu, we
- * need to set pre_pcpu as the destination of wakeup
- * notification event, then we can find the right vCPU
- * to wakeup in wakeup handler if interrupts happen
- * when the vCPU is in blocked state.
- */
- dest = cpu_physical_id(vcpu->pre_pcpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* set 'NV' to 'wakeup vector' */
- new.nv = POSTED_INTR_WAKEUP_VECTOR;
- } while (cmpxchg64(&pi_desc->control, old.control,
- new.control) != old.control);
-
- /* We should not block the vCPU if an interrupt is posted for it. */
- if (pi_test_on(pi_desc) == 1)
- __pi_post_block(vcpu);
-
- local_irq_enable();
- return (vcpu->pre_pcpu == -1);
-}
-
static int vmx_pre_block(struct kvm_vcpu *vcpu)
{
if (pi_pre_block(vcpu))
@@ -7674,17 +7491,6 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
return 0;
}
-static void pi_post_block(struct kvm_vcpu *vcpu)
-{
- if (vcpu->pre_pcpu == -1)
- return;
-
- WARN_ON(irqs_disabled());
- local_irq_disable();
- __pi_post_block(vcpu);
- local_irq_enable();
-}
-
static void vmx_post_block(struct kvm_vcpu *vcpu)
{
if (kvm_x86_ops.set_hv_timer)
@@ -7693,100 +7499,6 @@ static void vmx_post_block(struct kvm_vcpu *vcpu)
pi_post_block(vcpu);
}
-/*
- * vmx_update_pi_irte - set IRTE for Posted-Interrupts
- *
- * @kvm: kvm
- * @host_irq: host irq of the interrupt
- * @guest_irq: gsi of the interrupt
- * @set: set or unset PI
- * returns 0 on success, < 0 on failure
- */
-static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
- uint32_t guest_irq, bool set)
-{
- struct kvm_kernel_irq_routing_entry *e;
- struct kvm_irq_routing_table *irq_rt;
- struct kvm_lapic_irq irq;
- struct kvm_vcpu *vcpu;
- struct vcpu_data vcpu_info;
- int idx, ret = 0;
-
- if (!kvm_arch_has_assigned_device(kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(kvm->vcpus[0]))
- return 0;
-
- idx = srcu_read_lock(&kvm->irq_srcu);
- irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
- if (guest_irq >= irq_rt->nr_rt_entries ||
- hlist_empty(&irq_rt->map[guest_irq])) {
- pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
- guest_irq, irq_rt->nr_rt_entries);
- goto out;
- }
-
- hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
- if (e->type != KVM_IRQ_ROUTING_MSI)
- continue;
- /*
- * VT-d PI cannot support posting multicast/broadcast
- * interrupts to a vCPU, we still use interrupt remapping
- * for these kind of interrupts.
- *
- * For lowest-priority interrupts, we only support
- * those with single CPU as the destination, e.g. user
- * configures the interrupts via /proc/irq or uses
- * irqbalance to make the interrupts single-CPU.
- *
- * We will support full lowest-priority interrupt later.
- *
- * In addition, we can only inject generic interrupts using
- * the PI mechanism, refuse to route others through it.
- */
-
- kvm_set_msi_irq(kvm, e, &irq);
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
- !kvm_irq_is_postable(&irq)) {
- /*
- * Make sure the IRTE is in remapped mode if
- * we don't handle it in posted mode.
- */
- ret = irq_set_vcpu_affinity(host_irq, NULL);
- if (ret < 0) {
- printk(KERN_INFO
- "failed to back to remapped mode, irq: %u\n",
- host_irq);
- goto out;
- }
-
- continue;
- }
-
- vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
- vcpu_info.vector = irq.vector;
-
- trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
- vcpu_info.vector, vcpu_info.pi_desc_addr, set);
-
- if (set)
- ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else
- ret = irq_set_vcpu_affinity(host_irq, NULL);
-
- if (ret < 0) {
- printk(KERN_INFO "%s: failed to update PI IRTE\n",
- __func__);
- goto out;
- }
- }
-
- ret = 0;
-out:
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
static void vmx_setup_mce(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.mcg_cap & MCG_LMCE_P)
@@ -7844,11 +7556,6 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
/* RSM will cause a vmexit anyway. */
}
-static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
-{
- return false;
-}
-
static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{
return to_vmx(vcpu)->nested.vmxon;
@@ -7955,7 +7662,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
- .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
+ .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
.set_tss_addr = vmx_set_tss_addr,
.set_identity_map_addr = vmx_set_identity_map_addr,
@@ -7989,7 +7696,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.pmu_ops = &intel_pmu_ops,
.nested_ops = &vmx_nested_ops,
- .update_pi_irte = vmx_update_pi_irte,
+ .update_pi_irte = pi_update_irte,
#ifdef CONFIG_X86_64
.set_hv_timer = vmx_set_hv_timer,
@@ -8003,9 +7710,11 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.pre_leave_smm = vmx_pre_leave_smm,
.enable_smi_window = enable_smi_window,
- .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
+ .can_emulate_instruction = vmx_can_emulate_instruction,
.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
.migrate_timers = vmx_migrate_timers,
+
+ .msr_filter_changed = vmx_msr_filter_changed,
};
static __init int hardware_setup(void)
@@ -8017,8 +7726,8 @@ static __init int hardware_setup(void)
store_idt(&dt);
host_idt_base = dt.address;
- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
- kvm_define_shared_msr(i, vmx_msr_index[i]);
+ for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
+ kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
return -EIO;
@@ -8155,7 +7864,7 @@ static __init int hardware_setup(void)
vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
}
- kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+ kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
kvm_mce_cap_supported |= MCG_LMCE_P;
@@ -8294,8 +8003,8 @@ static int __init vmx_init(void)
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
- INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
- spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+
+ pi_init_cpu(cpu);
}
#ifdef CONFIG_KEXEC_CORE
@@ -8305,11 +8014,12 @@ static int __init vmx_init(void)
vmx_check_vmcs12_offsets();
/*
- * Intel processors don't have problems with
- * GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable
- * it for VMX by default
+ * Shadow paging doesn't have a (further) performance penalty
+ * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
+ * by default
*/
- allow_smaller_maxphyaddr = true;
+ if (!enable_ept)
+ allow_smaller_maxphyaddr = true;
return 0;
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a2f82127c170..f6f66e5c6510 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -9,8 +9,9 @@
#include "capabilities.h"
#include "kvm_cache_regs.h"
-#include "ops.h"
+#include "posted_intr.h"
#include "vmcs.h"
+#include "vmx_ops.h"
#include "cpuid.h"
extern const u32 vmx_msr_index[];
@@ -22,20 +23,20 @@ extern const u32 vmx_msr_index[];
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
#ifdef CONFIG_X86_64
-#define NR_SHARED_MSRS 7
+#define MAX_NR_USER_RETURN_MSRS 7
#else
-#define NR_SHARED_MSRS 4
+#define MAX_NR_USER_RETURN_MSRS 4
#endif
-#define NR_LOADSTORE_MSRS 8
+#define MAX_NR_LOADSTORE_MSRS 8
struct vmx_msrs {
unsigned int nr;
- struct vmx_msr_entry val[NR_LOADSTORE_MSRS];
+ struct vmx_msr_entry val[MAX_NR_LOADSTORE_MSRS];
};
-struct shared_msr_entry {
- unsigned index;
+struct vmx_uret_msr {
+ unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
u64 data;
u64 mask;
};
@@ -49,29 +50,6 @@ enum segment_cache_field {
SEG_FIELD_NR = 4
};
-/* Posted-Interrupt Descriptor */
-struct pi_desc {
- u32 pir[8]; /* Posted interrupt requested */
- union {
- struct {
- /* bit 256 - Outstanding Notification */
- u16 on : 1,
- /* bit 257 - Suppress Notification */
- sn : 1,
- /* bit 271:258 - Reserved */
- rsvd_1 : 14;
- /* bit 279:272 - Notification Vector */
- u8 nv;
- /* bit 287:280 - Reserved */
- u8 rsvd_2;
- /* bit 319:288 - Notification Destination */
- u32 ndst;
- };
- u64 control;
- };
- u32 rsvd[6];
-} __aligned(64);
-
#define RTIT_ADDR_RANGE 4
struct pt_ctx {
@@ -218,10 +196,10 @@ struct vcpu_vmx {
u32 idt_vectoring_info;
ulong rflags;
- struct shared_msr_entry guest_msrs[NR_SHARED_MSRS];
- int nmsrs;
- int save_nmsrs;
- bool guest_msrs_ready;
+ struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
+ int nr_uret_msrs;
+ int nr_active_uret_msrs;
+ bool guest_uret_msrs_loaded;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
@@ -301,6 +279,13 @@ struct vcpu_vmx {
u64 ept_pointer;
struct pt_desc pt_desc;
+
+ /* Save desired MSR intercept (read: pass-through) state */
+#define MAX_POSSIBLE_PASSTHROUGH_MSRS 13
+ struct {
+ DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+ DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
+ } shadow_msr_intercept;
};
enum ept_pointers_status {
@@ -334,7 +319,7 @@ unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
-void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
+int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
@@ -343,6 +328,7 @@ void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa,
int root_level);
+
void update_exception_bitmap(struct kvm_vcpu *vcpu);
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
@@ -350,75 +336,12 @@ bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu);
bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
-struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr);
-void pt_update_intercept_for_msr(struct vcpu_vmx *vmx);
+struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
+void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
-int vmx_find_msr_index(struct vmx_msrs *m, u32 msr);
-int vmx_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
- struct x86_exception *e);
+int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
-#define POSTED_INTR_ON 0
-#define POSTED_INTR_SN 1
-
-static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
-{
- return test_and_set_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
-{
- return test_and_clear_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
-{
- return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
-}
-
-static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
-{
- return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
-}
-
-static inline void pi_set_sn(struct pi_desc *pi_desc)
-{
- set_bit(POSTED_INTR_SN,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_set_on(struct pi_desc *pi_desc)
-{
- set_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_clear_on(struct pi_desc *pi_desc)
-{
- clear_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline void pi_clear_sn(struct pi_desc *pi_desc)
-{
- clear_bit(POSTED_INTR_SN,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_on(struct pi_desc *pi_desc)
-{
- return test_bit(POSTED_INTR_ON,
- (unsigned long *)&pi_desc->control);
-}
-
-static inline int pi_test_sn(struct pi_desc *pi_desc)
-{
- return test_bit(POSTED_INTR_SN,
- (unsigned long *)&pi_desc->control);
-}
-
static inline u8 vmx_get_rvi(void)
{
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
@@ -499,11 +422,6 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_vmx, vcpu);
}
-static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
-{
- return &(to_vmx(vcpu)->pi_desc);
-}
-
static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -552,7 +470,23 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
{
- return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+ if (!enable_ept)
+ return true;
+
+ return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+}
+
+static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
+{
+ return enable_unrestricted_guest && (!is_guest_mode(vcpu) ||
+ (secondary_exec_controls_get(to_vmx(vcpu)) &
+ SECONDARY_EXEC_UNRESTRICTED_GUEST));
+}
+
+bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu);
+static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
+{
+ return is_unrestricted_guest(vcpu) || __vmx_guest_state_valid(vcpu);
}
void dump_vmcs(void);
diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 692b0c31c9c8..692b0c31c9c8 100644
--- a/arch/x86/kvm/vmx/ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1994602a0851..f5ede41bf9e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -71,6 +71,7 @@
#include <asm/irq_remapping.h>
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
+#include <asm/tlbflush.h>
#include <asm/intel_pt.h>
#include <asm/emulate_prefix.h>
#include <clocksource/hyperv_timer.h>
@@ -161,24 +162,29 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
int __read_mostly pi_inject_timer = -1;
module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
-#define KVM_NR_SHARED_MSRS 16
+/*
+ * Restoring the host value for MSRs that are only consumed when running in
+ * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
+ * returns to userspace, i.e. the kernel can run with the guest's value.
+ */
+#define KVM_MAX_NR_USER_RETURN_MSRS 16
-struct kvm_shared_msrs_global {
+struct kvm_user_return_msrs_global {
int nr;
- u32 msrs[KVM_NR_SHARED_MSRS];
+ u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
};
-struct kvm_shared_msrs {
+struct kvm_user_return_msrs {
struct user_return_notifier urn;
bool registered;
- struct kvm_shared_msr_values {
+ struct kvm_user_return_msr_values {
u64 host;
u64 curr;
- } values[KVM_NR_SHARED_MSRS];
+ } values[KVM_MAX_NR_USER_RETURN_MSRS];
};
-static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
-static struct kvm_shared_msrs __percpu *shared_msrs;
+static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
+static struct kvm_user_return_msrs __percpu *user_return_msrs;
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
@@ -188,7 +194,7 @@ static struct kvm_shared_msrs __percpu *shared_msrs;
u64 __read_mostly host_efer;
EXPORT_SYMBOL_GPL(host_efer);
-bool __read_mostly allow_smaller_maxphyaddr;
+bool __read_mostly allow_smaller_maxphyaddr = 0;
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
static u64 __read_mostly host_xss;
@@ -259,14 +265,14 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
if (ignore_msrs) {
if (report_ignored_msrs)
- vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n",
- op, msr, data);
+ kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
+ op, msr, data);
/* Mask the error */
return 0;
} else {
- vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n",
- op, msr, data);
- return 1;
+ kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
+ op, msr, data);
+ return -ENOENT;
}
}
@@ -293,9 +299,9 @@ static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
static void kvm_on_user_return(struct user_return_notifier *urn)
{
unsigned slot;
- struct kvm_shared_msrs *locals
- = container_of(urn, struct kvm_shared_msrs, urn);
- struct kvm_shared_msr_values *values;
+ struct kvm_user_return_msrs *msrs
+ = container_of(urn, struct kvm_user_return_msrs, urn);
+ struct kvm_user_return_msr_values *values;
unsigned long flags;
/*
@@ -303,73 +309,73 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
* interrupted and executed through kvm_arch_hardware_disable()
*/
local_irq_save(flags);
- if (locals->registered) {
- locals->registered = false;
+ if (msrs->registered) {
+ msrs->registered = false;
user_return_notifier_unregister(urn);
}
local_irq_restore(flags);
- for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
- values = &locals->values[slot];
+ for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
+ values = &msrs->values[slot];
if (values->host != values->curr) {
- wrmsrl(shared_msrs_global.msrs[slot], values->host);
+ wrmsrl(user_return_msrs_global.msrs[slot], values->host);
values->curr = values->host;
}
}
}
-void kvm_define_shared_msr(unsigned slot, u32 msr)
+void kvm_define_user_return_msr(unsigned slot, u32 msr)
{
- BUG_ON(slot >= KVM_NR_SHARED_MSRS);
- shared_msrs_global.msrs[slot] = msr;
- if (slot >= shared_msrs_global.nr)
- shared_msrs_global.nr = slot + 1;
+ BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
+ user_return_msrs_global.msrs[slot] = msr;
+ if (slot >= user_return_msrs_global.nr)
+ user_return_msrs_global.nr = slot + 1;
}
-EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
+EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
-static void kvm_shared_msr_cpu_online(void)
+static void kvm_user_return_msr_cpu_online(void)
{
unsigned int cpu = smp_processor_id();
- struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+ struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
u64 value;
int i;
- for (i = 0; i < shared_msrs_global.nr; ++i) {
- rdmsrl_safe(shared_msrs_global.msrs[i], &value);
- smsr->values[i].host = value;
- smsr->values[i].curr = value;
+ for (i = 0; i < user_return_msrs_global.nr; ++i) {
+ rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
+ msrs->values[i].host = value;
+ msrs->values[i].curr = value;
}
}
-int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
{
unsigned int cpu = smp_processor_id();
- struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+ struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
int err;
- value = (value & mask) | (smsr->values[slot].host & ~mask);
- if (value == smsr->values[slot].curr)
+ value = (value & mask) | (msrs->values[slot].host & ~mask);
+ if (value == msrs->values[slot].curr)
return 0;
- err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
+ err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
if (err)
return 1;
- smsr->values[slot].curr = value;
- if (!smsr->registered) {
- smsr->urn.on_user_return = kvm_on_user_return;
- user_return_notifier_register(&smsr->urn);
- smsr->registered = true;
+ msrs->values[slot].curr = value;
+ if (!msrs->registered) {
+ msrs->urn.on_user_return = kvm_on_user_return;
+ user_return_notifier_register(&msrs->urn);
+ msrs->registered = true;
}
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
+EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
static void drop_user_return_notifiers(void)
{
unsigned int cpu = smp_processor_id();
- struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+ struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
- if (smsr->registered)
- kvm_on_user_return(&smsr->urn);
+ if (msrs->registered)
+ kvm_on_user_return(&msrs->urn);
}
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
@@ -976,6 +982,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
X86_CR4_SMEP;
+ unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
if (kvm_valid_cr4(vcpu, cr4))
return 1;
@@ -1003,7 +1010,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (kvm_x86_ops.set_cr4(vcpu, cr4))
return 1;
- if (((cr4 ^ old_cr4) & pdptr_bits) ||
+ if (((cr4 ^ old_cr4) & mmu_role_bits) ||
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
@@ -1451,6 +1458,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u64 old_efer = vcpu->arch.efer;
u64 efer = msr_info->data;
+ int r;
if (efer & efer_reserved_bits)
return 1;
@@ -1467,7 +1475,11 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
efer &= ~EFER_LMA;
efer |= vcpu->arch.efer & EFER_LMA;
- kvm_x86_ops.set_efer(vcpu, efer);
+ r = kvm_x86_ops.set_efer(vcpu, efer);
+ if (r) {
+ WARN_ON(r > 0);
+ return r;
+ }
/* Update reserved bits */
if ((efer ^ old_efer) & EFER_NX)
@@ -1482,6 +1494,40 @@ void kvm_enable_efer_bits(u64 mask)
}
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
+ u32 count = kvm->arch.msr_filter.count;
+ u32 i;
+ bool r = kvm->arch.msr_filter.default_allow;
+ int idx;
+
+ /* MSR filtering not set up or x2APIC enabled, allow everything */
+ if (!count || (index >= 0x800 && index <= 0x8ff))
+ return true;
+
+ /* Prevent collision with set_msr_filter */
+ idx = srcu_read_lock(&kvm->srcu);
+
+ for (i = 0; i < count; i++) {
+ u32 start = ranges[i].base;
+ u32 end = start + ranges[i].nmsrs;
+ u32 flags = ranges[i].flags;
+ unsigned long *bitmap = ranges[i].bitmap;
+
+ if ((index >= start) && (index < end) && (flags & type)) {
+ r = !!test_bit(index - start, bitmap);
+ break;
+ }
+ }
+
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(kvm_msr_allowed);
+
/*
* Write @data into the MSR specified by @index. Select MSR specific fault
* checks are bypassed if @host_initiated is %true.
@@ -1493,6 +1539,9 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
{
struct msr_data msr;
+ if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
+ return -EPERM;
+
switch (index) {
case MSR_FS_BASE:
case MSR_GS_BASE:
@@ -1549,6 +1598,9 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
struct msr_data msr;
int ret;
+ if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
+ return -EPERM;
+
msr.index = index;
msr.host_initiated = host_initiated;
@@ -1584,12 +1636,91 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
}
EXPORT_SYMBOL_GPL(kvm_set_msr);
+static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read)
+{
+ if (vcpu->run->msr.error) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ } else if (is_read) {
+ kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
+ kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
+ }
+
+ return kvm_skip_emulated_instruction(vcpu);
+}
+
+static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
+{
+ return complete_emulated_msr(vcpu, true);
+}
+
+static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
+{
+ return complete_emulated_msr(vcpu, false);
+}
+
+static u64 kvm_msr_reason(int r)
+{
+ switch (r) {
+ case -ENOENT:
+ return KVM_MSR_EXIT_REASON_UNKNOWN;
+ case -EPERM:
+ return KVM_MSR_EXIT_REASON_FILTER;
+ default:
+ return KVM_MSR_EXIT_REASON_INVAL;
+ }
+}
+
+static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
+ u32 exit_reason, u64 data,
+ int (*completion)(struct kvm_vcpu *vcpu),
+ int r)
+{
+ u64 msr_reason = kvm_msr_reason(r);
+
+ /* Check if the user wanted to know about this MSR fault */
+ if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
+ return 0;
+
+ vcpu->run->exit_reason = exit_reason;
+ vcpu->run->msr.error = 0;
+ memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
+ vcpu->run->msr.reason = msr_reason;
+ vcpu->run->msr.index = index;
+ vcpu->run->msr.data = data;
+ vcpu->arch.complete_userspace_io = completion;
+
+ return 1;
+}
+
+static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
+{
+ return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
+ complete_emulated_rdmsr, r);
+}
+
+static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
+{
+ return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
+ complete_emulated_wrmsr, r);
+}
+
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
{
u32 ecx = kvm_rcx_read(vcpu);
u64 data;
+ int r;
+
+ r = kvm_get_msr(vcpu, ecx, &data);
- if (kvm_get_msr(vcpu, ecx, &data)) {
+ /* MSR read failed? See if we should ask user space */
+ if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
+ /* Bounce to user space */
+ return 0;
+ }
+
+ /* MSR read failed? Inject a #GP */
+ if (r) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(vcpu, 0);
return 1;
@@ -1607,8 +1738,21 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
{
u32 ecx = kvm_rcx_read(vcpu);
u64 data = kvm_read_edx_eax(vcpu);
+ int r;
- if (kvm_set_msr(vcpu, ecx, data)) {
+ r = kvm_set_msr(vcpu, ecx, data);
+
+ /* MSR write failed? See if we should ask user space */
+ if (r && kvm_set_msr_user_space(vcpu, ecx, data, r))
+ /* Bounce to user space */
+ return 0;
+
+ /* Signal all other negative errors to userspace */
+ if (r < 0)
+ return r;
+
+ /* MSR write failed? Inject a #GP */
+ if (r > 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
@@ -1774,12 +1918,6 @@ static s64 get_kvmclock_base_ns(void)
}
#endif
-void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
-{
- kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
- kvm_vcpu_kick(vcpu);
-}
-
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
{
int version;
@@ -1787,6 +1925,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
struct pvclock_wall_clock wc;
u64 wall_nsec;
+ kvm->arch.wall_clock = wall_clock;
+
if (!wall_clock)
return;
@@ -1819,6 +1959,34 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
}
+static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
+ bool old_msr, bool host_initiated)
+{
+ struct kvm_arch *ka = &vcpu->kvm->arch;
+
+ if (vcpu->vcpu_id == 0 && !host_initiated) {
+ if (ka->boot_vcpu_runs_old_kvmclock && old_msr)
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
+
+ ka->boot_vcpu_runs_old_kvmclock = old_msr;
+ }
+
+ vcpu->arch.time = system_time;
+ kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
+
+ /* we verify if the enable bit is set... */
+ vcpu->arch.pv_time_enabled = false;
+ if (!(system_time & 1))
+ return;
+
+ if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.pv_time, system_time & ~1ULL,
+ sizeof(struct pvclock_vcpu_time_info)))
+ vcpu->arch.pv_time_enabled = true;
+
+ return;
+}
+
static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
{
do_shl32_div32(dividend, divisor);
@@ -1978,12 +2146,6 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
#endif
}
-static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
-{
- u64 curr_offset = vcpu->arch.l1_tsc_offset;
- vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
-}
-
/*
* Multiply tsc by a fixed point number represented by ratio.
*
@@ -2045,14 +2207,13 @@ static inline bool kvm_check_tsc_unstable(void)
return check_tsc_unstable();
}
-void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
+static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
{
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
bool matched;
bool already_matched;
- u64 data = msr->data;
bool synchronizing = false;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
@@ -2061,7 +2222,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
- if (data == 0 && msr->host_initiated) {
+ if (data == 0) {
/*
* detection of vcpu initialization -- need to sync
* with other vCPUs. This particularly helps to keep
@@ -2131,9 +2292,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
- if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
- update_ia32_tsc_adjust_msr(vcpu, offset);
-
kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
@@ -2148,8 +2306,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
}
-EXPORT_SYMBOL_GPL(kvm_write_tsc);
-
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
s64 adjustment)
{
@@ -2695,24 +2851,19 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
u32 page_num = data & ~PAGE_MASK;
u64 page_addr = data & PAGE_MASK;
u8 *page;
- int r;
- r = -E2BIG;
if (page_num >= blob_size)
- goto out;
- r = -ENOMEM;
+ return 1;
+
page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
- if (IS_ERR(page)) {
- r = PTR_ERR(page);
- goto out;
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
+ kfree(page);
+ return 1;
}
- if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
- goto out_free;
- r = 0;
-out_free:
- kfree(page);
-out:
- return r;
+ return 0;
}
static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
@@ -2730,6 +2881,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
if (data & 0x30)
return 1;
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
+ (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
+ return 1;
+
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
+ (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
+ return 1;
+
if (!lapic_in_kernel(vcpu))
return data ? 1 : 0;
@@ -2807,10 +2966,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
- trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
- st->preempted & KVM_VCPU_FLUSH_TLB);
- if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
- kvm_vcpu_flush_tlb_guest(vcpu);
+ if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+ trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
+ st->preempted & KVM_VCPU_FLUSH_TLB);
+ if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ kvm_vcpu_flush_tlb_guest(vcpu);
+ }
vcpu->arch.st.preempted = 0;
@@ -2944,7 +3105,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.msr_ia32_power_ctl = data;
break;
case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, msr_info);
+ if (msr_info->host_initiated) {
+ kvm_synchronize_tsc(vcpu, data);
+ } else {
+ u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
+ adjust_tsc_offset_guest(vcpu, adj);
+ vcpu->arch.ia32_tsc_adjust_msr += adj;
+ }
break;
case MSR_IA32_XSS:
if (!msr_info->host_initiated &&
@@ -2965,53 +3132,54 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.smi_count = data;
break;
case MSR_KVM_WALL_CLOCK_NEW:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+ return 1;
+
+ kvm_write_wall_clock(vcpu->kvm, data);
+ break;
case MSR_KVM_WALL_CLOCK:
- vcpu->kvm->arch.wall_clock = data;
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+ return 1;
+
kvm_write_wall_clock(vcpu->kvm, data);
break;
case MSR_KVM_SYSTEM_TIME_NEW:
- case MSR_KVM_SYSTEM_TIME: {
- struct kvm_arch *ka = &vcpu->kvm->arch;
-
- if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
- bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
-
- if (ka->boot_vcpu_runs_old_kvmclock != tmp)
- kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
-
- ka->boot_vcpu_runs_old_kvmclock = tmp;
- }
-
- vcpu->arch.time = data;
- kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
-
- /* we verify if the enable bit is set... */
- vcpu->arch.pv_time_enabled = false;
- if (!(data & 1))
- break;
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
+ return 1;
- if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
- &vcpu->arch.pv_time, data & ~1ULL,
- sizeof(struct pvclock_vcpu_time_info)))
- vcpu->arch.pv_time_enabled = true;
+ kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
+ break;
+ case MSR_KVM_SYSTEM_TIME:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
+ return 1;
+ kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
break;
- }
case MSR_KVM_ASYNC_PF_EN:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ return 1;
+
if (kvm_pv_enable_async_pf(vcpu, data))
return 1;
break;
case MSR_KVM_ASYNC_PF_INT:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
+ return 1;
+
if (kvm_pv_enable_async_pf_int(vcpu, data))
return 1;
break;
case MSR_KVM_ASYNC_PF_ACK:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ return 1;
if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false;
kvm_check_async_pf_completion(vcpu);
}
break;
case MSR_KVM_STEAL_TIME:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
+ return 1;
if (unlikely(!sched_info_on()))
return 1;
@@ -3028,11 +3196,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_PV_EOI_EN:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
+ return 1;
+
if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;
case MSR_KVM_POLL_CONTROL:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
+ return 1;
+
/* only enable bit supported */
if (data & (-1ULL << 1))
return 1;
@@ -3221,9 +3395,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_POWER_CTL:
msr_info->data = vcpu->arch.msr_ia32_power_ctl;
break;
- case MSR_IA32_TSC:
- msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
+ case MSR_IA32_TSC: {
+ /*
+ * Intel SDM states that MSR_IA32_TSC read adds the TSC offset
+ * even when not intercepted. AMD manual doesn't explicitly
+ * state this but appears to behave the same.
+ *
+ * On userspace reads and writes, however, we unconditionally
+ * return L1's TSC value to ensure backwards-compatible
+ * behavior for migration.
+ */
+ u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
+ vcpu->arch.tsc_offset;
+
+ msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
break;
+ }
case MSR_MTRRcap:
case 0x200 ... 0x2ff:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -3513,6 +3700,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_EXCEPTION_PAYLOAD:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_LAST_CPU:
+ case KVM_CAP_X86_USER_SPACE_MSR:
+ case KVM_CAP_X86_MSR_FILTER:
+ case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -4383,6 +4573,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return kvm_x86_ops.enable_direct_tlbflush(vcpu);
+ case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
+ vcpu->arch.pv_cpuid.enforce = cap->args[0];
+
+ return 0;
+
default:
return -EINVAL;
}
@@ -5033,6 +5228,10 @@ split_irqchip_unlock:
kvm->arch.exception_payload_enabled = cap->args[0];
r = 0;
break;
+ case KVM_CAP_X86_USER_SPACE_MSR:
+ kvm->arch.user_space_msr_mask = cap->args[0];
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -5040,6 +5239,110 @@ split_irqchip_unlock:
return r;
}
+static void kvm_clear_msr_filter(struct kvm *kvm)
+{
+ u32 i;
+ u32 count = kvm->arch.msr_filter.count;
+ struct msr_bitmap_range ranges[16];
+
+ mutex_lock(&kvm->lock);
+ kvm->arch.msr_filter.count = 0;
+ memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
+ mutex_unlock(&kvm->lock);
+ synchronize_srcu(&kvm->srcu);
+
+ for (i = 0; i < count; i++)
+ kfree(ranges[i].bitmap);
+}
+
+static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
+{
+ struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
+ struct msr_bitmap_range range;
+ unsigned long *bitmap = NULL;
+ size_t bitmap_size;
+ int r;
+
+ if (!user_range->nmsrs)
+ return 0;
+
+ bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
+ if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
+ return -EINVAL;
+
+ bitmap = memdup_user((__user u8*)user_range->bitmap, bitmap_size);
+ if (IS_ERR(bitmap))
+ return PTR_ERR(bitmap);
+
+ range = (struct msr_bitmap_range) {
+ .flags = user_range->flags,
+ .base = user_range->base,
+ .nmsrs = user_range->nmsrs,
+ .bitmap = bitmap,
+ };
+
+ if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
+ r = -EINVAL;
+ goto err;
+ }
+
+ if (!range.flags) {
+ r = -EINVAL;
+ goto err;
+ }
+
+ /* Everything ok, add this range identifier to our global pool */
+ ranges[kvm->arch.msr_filter.count] = range;
+ /* Make sure we filled the array before we tell anyone to walk it */
+ smp_wmb();
+ kvm->arch.msr_filter.count++;
+
+ return 0;
+err:
+ kfree(bitmap);
+ return r;
+}
+
+static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_msr_filter filter;
+ bool default_allow;
+ int r = 0;
+ bool empty = true;
+ u32 i;
+
+ if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
+ return -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
+ empty &= !filter.ranges[i].nmsrs;
+
+ default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
+ if (empty && !default_allow)
+ return -EINVAL;
+
+ kvm_clear_msr_filter(kvm);
+
+ kvm->arch.msr_filter.default_allow = default_allow;
+
+ /*
+ * Protect from concurrent calls to this function that could trigger
+ * a TOCTOU violation on kvm->arch.msr_filter.count.
+ */
+ mutex_lock(&kvm->lock);
+ for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
+ r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
+ if (r)
+ break;
+ }
+
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
+ mutex_unlock(&kvm->lock);
+
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5346,6 +5649,9 @@ set_pit2_out:
case KVM_SET_PMU_EVENT_FILTER:
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
break;
+ case KVM_X86_SET_MSR_FILTER:
+ r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
+ break;
default:
r = -ENOTTY;
}
@@ -5707,6 +6013,9 @@ int handle_ud(struct kvm_vcpu *vcpu)
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
+ if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, NULL, 0)))
+ return 1;
+
if (force_emulation_prefix &&
kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
sig, sizeof(sig), &e) == 0 &&
@@ -6362,13 +6671,33 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 *pdata)
{
- return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ int r;
+
+ r = kvm_get_msr(vcpu, msr_index, pdata);
+
+ if (r && kvm_get_msr_user_space(vcpu, msr_index, r)) {
+ /* Bounce to user space */
+ return X86EMUL_IO_NEEDED;
+ }
+
+ return r;
}
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 data)
{
- return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ int r;
+
+ r = kvm_set_msr(vcpu, msr_index, data);
+
+ if (r && kvm_set_msr_user_space(vcpu, msr_index, data, r)) {
+ /* Bounce to user space */
+ return X86EMUL_IO_NEEDED;
+ }
+
+ return r;
}
static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
@@ -6912,7 +7241,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int r;
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
bool writeback = true;
- bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
+ bool write_fault_to_spt;
+
+ if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, insn, insn_len)))
+ return 1;
vcpu->arch.l1tf_flush_l1d = true;
@@ -6920,6 +7252,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* Clear write_fault_to_shadow_pgtable here to ensure it is
* never reused.
*/
+ write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
vcpu->arch.write_fault_to_shadow_pgtable = false;
kvm_clear_exception_queue(vcpu);
@@ -7514,9 +7847,9 @@ int kvm_arch_init(void *opaque)
goto out_free_x86_fpu_cache;
}
- shared_msrs = alloc_percpu(struct kvm_shared_msrs);
- if (!shared_msrs) {
- printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
+ user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
+ if (!user_return_msrs) {
+ printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
goto out_free_x86_emulator_cache;
}
@@ -7549,7 +7882,7 @@ int kvm_arch_init(void *opaque)
return 0;
out_free_percpu:
- free_percpu(shared_msrs);
+ free_percpu(user_return_msrs);
out_free_x86_emulator_cache:
kmem_cache_destroy(x86_emulator_cache);
out_free_x86_fpu_cache:
@@ -7576,7 +7909,7 @@ void kvm_arch_exit(void)
#endif
kvm_x86_ops.hardware_enable = NULL;
kvm_mmu_module_exit();
- free_percpu(shared_msrs);
+ free_percpu(user_return_msrs);
kmem_cache_destroy(x86_fpu_cache);
}
@@ -7717,11 +8050,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
goto out;
}
+ ret = -KVM_ENOSYS;
+
switch (nr) {
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
case KVM_HC_KICK_CPU:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
+ break;
+
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
kvm_sched_yield(vcpu->kvm, a1);
ret = 0;
@@ -7732,9 +8070,15 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break;
#endif
case KVM_HC_SEND_IPI:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
+ break;
+
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
case KVM_HC_SCHED_YIELD:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
+ break;
+
kvm_sched_yield(vcpu->kvm, a0);
ret = 0;
break;
@@ -8365,8 +8709,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
- if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
- if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
+ if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+ if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
r = 0;
goto out;
}
@@ -8473,6 +8817,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_vcpu_update_apicv(vcpu);
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
kvm_check_async_pf_completion(vcpu);
+ if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
+ kvm_x86_ops.msr_filter_changed(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -8548,7 +8894,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops.request_immediate_exit(vcpu);
}
- trace_kvm_entry(vcpu->vcpu_id);
+ trace_kvm_entry(vcpu);
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@ -9562,7 +9908,6 @@ fail_mmu_destroy:
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
- struct msr_data msr;
struct kvm *kvm = vcpu->kvm;
kvm_hv_vcpu_postcreate(vcpu);
@@ -9570,10 +9915,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
if (mutex_lock_killable(&vcpu->mutex))
return;
vcpu_load(vcpu);
- msr.data = 0x0;
- msr.index = MSR_IA32_TSC;
- msr.host_initiated = true;
- kvm_write_tsc(vcpu, &msr);
+ kvm_synchronize_tsc(vcpu, 0);
vcpu_put(vcpu);
/* poll control enabled by default */
@@ -9610,6 +9952,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_mmu_destroy(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
+ kvfree(vcpu->arch.cpuid_entries);
if (!lapic_in_kernel(vcpu))
static_key_slow_dec(&kvm_no_apic_vcpu);
}
@@ -9707,7 +10050,7 @@ int kvm_arch_hardware_enable(void)
u64 max_tsc = 0;
bool stable, backwards_tsc = false;
- kvm_shared_msr_cpu_online();
+ kvm_user_return_msr_cpu_online();
ret = kvm_x86_ops.hardware_enable();
if (ret != 0)
return ret;
@@ -10025,6 +10368,8 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ u32 i;
+
if (current->mm == kvm->mm) {
/*
* Free memory regions allocated on behalf of userspace,
@@ -10041,6 +10386,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
}
if (kvm_x86_ops.vm_destroy)
kvm_x86_ops.vm_destroy(kvm);
+ for (i = 0; i < kvm->arch.msr_filter.count; i++)
+ kfree(kvm->arch.msr_filter.ranges[i].bitmap);
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
@@ -10771,6 +11118,111 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c
}
EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
+/*
+ * Handles kvm_read/write_guest_virt*() result and either injects #PF or returns
+ * KVM_EXIT_INTERNAL_ERROR for cases not currently handled by KVM. Return value
+ * indicates whether exit to userspace is needed.
+ */
+int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+ struct x86_exception *e)
+{
+ if (r == X86EMUL_PROPAGATE_FAULT) {
+ kvm_inject_emulated_page_fault(vcpu, e);
+ return 1;
+ }
+
+ /*
+ * In case kvm_read/write_guest_virt*() failed with X86EMUL_IO_NEEDED
+ * while handling a VMX instruction KVM could've handled the request
+ * correctly by exiting to userspace and performing I/O but there
+ * doesn't seem to be a real use-case behind such requests, just return
+ * KVM_EXIT_INTERNAL_ERROR for now.
+ */
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
+
+int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
+{
+ bool pcid_enabled;
+ struct x86_exception e;
+ unsigned i;
+ unsigned long roots_to_free = 0;
+ struct {
+ u64 pcid;
+ u64 gla;
+ } operand;
+ int r;
+
+ r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
+ if (r != X86EMUL_CONTINUE)
+ return kvm_handle_memory_failure(vcpu, r, &e);
+
+ if (operand.pcid >> 12 != 0) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+
+ switch (type) {
+ case INVPCID_TYPE_INDIV_ADDR:
+ if ((!pcid_enabled && (operand.pcid != 0)) ||
+ is_noncanonical_address(operand.gla, vcpu)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
+ return kvm_skip_emulated_instruction(vcpu);
+
+ case INVPCID_TYPE_SINGLE_CTXT:
+ if (!pcid_enabled && (operand.pcid != 0)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ if (kvm_get_active_pcid(vcpu) == operand.pcid) {
+ kvm_mmu_sync_roots(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+ }
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
+ == operand.pcid)
+ roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+
+ kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
+ /*
+ * If neither the current cr3 nor any of the prev_roots use the
+ * given PCID, then nothing needs to be done here because a
+ * resync will happen anyway before switching to any other CR3.
+ */
+
+ return kvm_skip_emulated_instruction(vcpu);
+
+ case INVPCID_TYPE_ALL_NON_GLOBAL:
+ /*
+ * Currently, KVM doesn't mark global entries in the shadow
+ * page tables, so a non-global flush just degenerates to a
+ * global flush. If needed, we could optimize this later by
+ * keeping track of global entries in shadow page tables.
+ */
+
+ fallthrough;
+ case INVPCID_TYPE_ALL_INCL_GLOBAL:
+ kvm_mmu_unload(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+
+ default:
+ BUG(); /* We have already checked above that type <= 3 */
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 995ab696dcf0..3900ab0c6004 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -246,7 +246,6 @@ static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
return is_smm(vcpu) || kvm_x86_ops.apic_init_signal_blocked(vcpu);
}
-void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -372,6 +371,10 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
int kvm_spec_ctrl_test_value(u64 value);
int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu);
+int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+ struct x86_exception *e);
+int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
+bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
#define KVM_MSR_RET_INVALID 2
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index aa067859a70b..bad4dee4f0e4 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
+lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index d1d768912368..4304320e51f4 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -253,28 +253,17 @@ EXPORT_SYMBOL(csum_partial)
/*
unsigned int csum_partial_copy_generic (const char *src, char *dst,
- int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ int len)
*/
/*
* Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for all access types.
- *
- * FIXME: could someone double-check whether I haven't mixed up some SRC and
- * DST definitions? It's damn hard to trigger all cases. I hope I got
- * them all but there's no guarantee.
*/
-#define SRC(y...) \
+#define EXC(y...) \
9999: y; \
_ASM_EXTABLE_UA(9999b, 6001f)
-#define DST(y...) \
- 9999: y; \
- _ASM_EXTABLE_UA(9999b, 6002f)
-
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
#define ARGBASE 16
@@ -285,20 +274,20 @@ SYM_FUNC_START(csum_partial_copy_generic)
pushl %edi
pushl %esi
pushl %ebx
- movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
movl ARGBASE+8(%esp),%edi # dst
+ movl $-1, %eax # sum
testl $2, %edi # Check alignment.
jz 2f # Jump if alignment is ok.
subl $2, %ecx # Alignment uses up two bytes.
jae 1f # Jump if we had at least two bytes.
addl $2, %ecx # ecx was < 2. Deal with it.
jmp 4f
-SRC(1: movw (%esi), %bx )
+EXC(1: movw (%esi), %bx )
addl $2, %esi
-DST( movw %bx, (%edi) )
+EXC( movw %bx, (%edi) )
addl $2, %edi
addw %bx, %ax
adcl $0, %eax
@@ -306,34 +295,34 @@ DST( movw %bx, (%edi) )
movl %ecx, FP(%esp)
shrl $5, %ecx
jz 2f
- testl %esi, %esi
-SRC(1: movl (%esi), %ebx )
-SRC( movl 4(%esi), %edx )
+ testl %esi, %esi # what's wrong with clc?
+EXC(1: movl (%esi), %ebx )
+EXC( movl 4(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, (%edi) )
+EXC( movl %ebx, (%edi) )
adcl %edx, %eax
-DST( movl %edx, 4(%edi) )
+EXC( movl %edx, 4(%edi) )
-SRC( movl 8(%esi), %ebx )
-SRC( movl 12(%esi), %edx )
+EXC( movl 8(%esi), %ebx )
+EXC( movl 12(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, 8(%edi) )
+EXC( movl %ebx, 8(%edi) )
adcl %edx, %eax
-DST( movl %edx, 12(%edi) )
+EXC( movl %edx, 12(%edi) )
-SRC( movl 16(%esi), %ebx )
-SRC( movl 20(%esi), %edx )
+EXC( movl 16(%esi), %ebx )
+EXC( movl 20(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, 16(%edi) )
+EXC( movl %ebx, 16(%edi) )
adcl %edx, %eax
-DST( movl %edx, 20(%edi) )
+EXC( movl %edx, 20(%edi) )
-SRC( movl 24(%esi), %ebx )
-SRC( movl 28(%esi), %edx )
+EXC( movl 24(%esi), %ebx )
+EXC( movl 28(%esi), %edx )
adcl %ebx, %eax
-DST( movl %ebx, 24(%edi) )
+EXC( movl %ebx, 24(%edi) )
adcl %edx, %eax
-DST( movl %edx, 28(%edi) )
+EXC( movl %edx, 28(%edi) )
lea 32(%esi), %esi
lea 32(%edi), %edi
@@ -345,9 +334,9 @@ DST( movl %edx, 28(%edi) )
andl $0x1c, %edx
je 4f
shrl $2, %edx # This clears CF
-SRC(3: movl (%esi), %ebx )
+EXC(3: movl (%esi), %ebx )
adcl %ebx, %eax
-DST( movl %ebx, (%edi) )
+EXC( movl %ebx, (%edi) )
lea 4(%esi), %esi
lea 4(%edi), %edi
dec %edx
@@ -357,39 +346,24 @@ DST( movl %ebx, (%edi) )
jz 7f
cmpl $2, %ecx
jb 5f
-SRC( movw (%esi), %cx )
+EXC( movw (%esi), %cx )
leal 2(%esi), %esi
-DST( movw %cx, (%edi) )
+EXC( movw %cx, (%edi) )
leal 2(%edi), %edi
je 6f
shll $16,%ecx
-SRC(5: movb (%esi), %cl )
-DST( movb %cl, (%edi) )
+EXC(5: movb (%esi), %cl )
+EXC( movb %cl, (%edi) )
6: addl %ecx, %eax
adcl $0, %eax
7:
-5000:
# Exception handler:
.section .fixup, "ax"
6001:
- movl ARGBASE+20(%esp), %ebx # src_err_ptr
- movl $-EFAULT, (%ebx)
-
- # zero the complete destination - computing the rest
- # is too much work
- movl ARGBASE+8(%esp), %edi # dst
- movl ARGBASE+12(%esp), %ecx # len
- xorl %eax,%eax
- rep ; stosb
-
- jmp 5000b
-
-6002:
- movl ARGBASE+24(%esp), %ebx # dst_err_ptr
- movl $-EFAULT,(%ebx)
- jmp 5000b
+ xorl %eax, %eax
+ jmp 7b
.previous
@@ -405,14 +379,14 @@ SYM_FUNC_END(csum_partial_copy_generic)
/* Version for PentiumII/PPro */
#define ROUND1(x) \
- SRC(movl x(%esi), %ebx ) ; \
+ EXC(movl x(%esi), %ebx ) ; \
addl %ebx, %eax ; \
- DST(movl %ebx, x(%edi) ) ;
+ EXC(movl %ebx, x(%edi) ) ;
#define ROUND(x) \
- SRC(movl x(%esi), %ebx ) ; \
+ EXC(movl x(%esi), %ebx ) ; \
adcl %ebx, %eax ; \
- DST(movl %ebx, x(%edi) ) ;
+ EXC(movl %ebx, x(%edi) ) ;
#define ARGBASE 12
@@ -423,7 +397,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
- movl ARGBASE+16(%esp),%eax #sum
+ movl $-1, %eax #sum
# movl %ecx, %edx
movl %ecx, %ebx
movl %esi, %edx
@@ -439,7 +413,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
JMP_NOSPEC ebx
1: addl $64,%esi
addl $64,%edi
- SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
+ EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl)
ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
@@ -453,29 +427,20 @@ SYM_FUNC_START(csum_partial_copy_generic)
jz 7f
cmpl $2, %edx
jb 5f
-SRC( movw (%esi), %dx )
+EXC( movw (%esi), %dx )
leal 2(%esi), %esi
-DST( movw %dx, (%edi) )
+EXC( movw %dx, (%edi) )
leal 2(%edi), %edi
je 6f
shll $16,%edx
5:
-SRC( movb (%esi), %dl )
-DST( movb %dl, (%edi) )
+EXC( movb (%esi), %dl )
+EXC( movb %dl, (%edi) )
6: addl %edx, %eax
adcl $0, %eax
7:
.section .fixup, "ax"
-6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
- movl $-EFAULT, (%ebx)
- # zero the complete destination (computing the rest is too much work)
- movl ARGBASE+8(%esp),%edi # dst
- movl ARGBASE+12(%esp),%ecx # len
- xorl %eax,%eax
- rep; stosb
- jmp 7b
-6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
- movl $-EFAULT, (%ebx)
+6001: xorl %eax, %eax
jmp 7b
.previous
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
new file mode 100644
index 000000000000..c13e8c9ee926
--- /dev/null
+++ b/arch/x86/lib/copy_mc.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/mce.h>
+
+#ifdef CONFIG_X86_MCE
+/*
+ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
+ * implementation.
+ */
+static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
+
+void enable_copy_mc_fragile(void)
+{
+ static_branch_inc(&copy_mc_fragile_key);
+}
+#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
+
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point, or
+ * source exception point.
+ */
+__visible notrace unsigned long
+copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
+{
+ for (; len; --len, to++, from++)
+ if (copy_mc_fragile(to, from, 1))
+ break;
+ return len;
+}
+#else
+/*
+ * No point in doing careful copying, or consulting a static key when
+ * there is no #MC handler in the CONFIG_X86_MCE=n case.
+ */
+void enable_copy_mc_fragile(void)
+{
+}
+#define copy_mc_fragile_enabled (0)
+#endif
+
+unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
+
+/**
+ * copy_mc_to_kernel - memory copy that handles source exceptions
+ *
+ * @dst: destination address
+ * @src: source address
+ * @len: number of bytes to copy
+ *
+ * Call into the 'fragile' version on systems that benefit from avoiding
+ * corner case poison consumption scenarios, For example, accessing
+ * poison across 2 cachelines with a single instruction. Almost all
+ * other uses case can use copy_mc_enhanced_fast_string() for a fast
+ * recoverable copy, or fallback to plain memcpy.
+ *
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
+ */
+unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
+{
+ if (copy_mc_fragile_enabled)
+ return copy_mc_fragile(dst, src, len);
+ if (static_cpu_has(X86_FEATURE_ERMS))
+ return copy_mc_enhanced_fast_string(dst, src, len);
+ memcpy(dst, src, len);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
+
+unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
+{
+ unsigned long ret;
+
+ if (copy_mc_fragile_enabled) {
+ __uaccess_begin();
+ ret = copy_mc_fragile(dst, src, len);
+ __uaccess_end();
+ return ret;
+ }
+
+ if (static_cpu_has(X86_FEATURE_ERMS)) {
+ __uaccess_begin();
+ ret = copy_mc_enhanced_fast_string(dst, src, len);
+ __uaccess_end();
+ return ret;
+ }
+
+ return copy_user_generic(dst, src, len);
+}
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
new file mode 100644
index 000000000000..892d8915f609
--- /dev/null
+++ b/arch/x86/lib/copy_mc_64.S
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/linkage.h>
+#include <asm/copy_mc_test.h>
+#include <asm/export.h>
+#include <asm/asm.h>
+
+#ifndef CONFIG_UML
+
+#ifdef CONFIG_X86_MCE
+COPY_MC_TEST_CTL
+
+/*
+ * copy_mc_fragile - copy memory with indication if an exception / fault happened
+ *
+ * The 'fragile' version is opted into by platform quirks and takes
+ * pains to avoid unrecoverable corner cases like 'fast-string'
+ * instruction sequences, and consuming poison across a cacheline
+ * boundary. The non-fragile version is equivalent to memcpy()
+ * regardless of CPU machine-check-recovery capability.
+ */
+SYM_FUNC_START(copy_mc_fragile)
+ cmpl $8, %edx
+ /* Less than 8 bytes? Go to byte copy loop */
+ jb .L_no_whole_words
+
+ /* Check for bad alignment of source */
+ testl $7, %esi
+ /* Already aligned */
+ jz .L_8byte_aligned
+
+ /* Copy one byte at a time until source is 8-byte aligned */
+ movl %esi, %ecx
+ andl $7, %ecx
+ subl $8, %ecx
+ negl %ecx
+ subl %ecx, %edx
+.L_read_leading_bytes:
+ movb (%rsi), %al
+ COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
+ COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
+.L_write_leading_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_leading_bytes
+
+.L_8byte_aligned:
+ movl %edx, %ecx
+ andl $7, %edx
+ shrl $3, %ecx
+ jz .L_no_whole_words
+
+.L_read_words:
+ movq (%rsi), %r8
+ COPY_MC_TEST_SRC %rsi 8 .E_read_words
+ COPY_MC_TEST_DST %rdi 8 .E_write_words
+.L_write_words:
+ movq %r8, (%rdi)
+ addq $8, %rsi
+ addq $8, %rdi
+ decl %ecx
+ jnz .L_read_words
+
+ /* Any trailing bytes? */
+.L_no_whole_words:
+ andl %edx, %edx
+ jz .L_done_memcpy_trap
+
+ /* Copy trailing bytes */
+ movl %edx, %ecx
+.L_read_trailing_bytes:
+ movb (%rsi), %al
+ COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
+ COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
+.L_write_trailing_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_trailing_bytes
+
+ /* Copy successful. Return zero */
+.L_done_memcpy_trap:
+ xorl %eax, %eax
+.L_done:
+ ret
+SYM_FUNC_END(copy_mc_fragile)
+EXPORT_SYMBOL_GPL(copy_mc_fragile)
+
+ .section .fixup, "ax"
+ /*
+ * Return number of bytes not copied for any failure. Note that
+ * there is no "tail" handling since the source buffer is 8-byte
+ * aligned and poison is cacheline aligned.
+ */
+.E_read_words:
+ shll $3, %ecx
+.E_leading_bytes:
+ addl %edx, %ecx
+.E_trailing_bytes:
+ mov %ecx, %eax
+ jmp .L_done
+
+ /*
+ * For write fault handling, given the destination is unaligned,
+ * we handle faults on multi-byte writes with a byte-by-byte
+ * copy up to the write-protected page.
+ */
+.E_write_words:
+ shll $3, %ecx
+ addl %edx, %ecx
+ movl %ecx, %edx
+ jmp copy_mc_fragile_handle_tail
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE(.L_write_words, .E_write_words)
+ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+#endif /* CONFIG_X86_MCE */
+
+/*
+ * copy_mc_enhanced_fast_string - memory copy with exception handling
+ *
+ * Fast string copy + fault / exception handling. If the CPU does
+ * support machine check exception recovery, but does not support
+ * recovering from fast-string exceptions then this CPU needs to be
+ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
+ * machine check recovery support this version should be no slower than
+ * standard memcpy.
+ */
+SYM_FUNC_START(copy_mc_enhanced_fast_string)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+.L_copy:
+ rep movsb
+ /* Copy successful. Return zero */
+ xorl %eax, %eax
+ ret
+SYM_FUNC_END(copy_mc_enhanced_fast_string)
+
+ .section .fixup, "ax"
+.E_copy:
+ /*
+ * On fault %rcx is updated such that the copy instruction could
+ * optionally be restarted at the fault position, i.e. it
+ * contains 'bytes remaining'. A non-zero return indicates error
+ * to copy_mc_generic() users, or indicate short transfers to
+ * user-copy routines.
+ */
+ movq %rcx, %rax
+ ret
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_copy, .E_copy)
+#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 816f128a6d52..77b9b2a3b5c8 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,6 +15,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
@@ -36,8 +37,8 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
+ _ASM_EXTABLE_CPY(100b, 103b)
+ _ASM_EXTABLE_CPY(101b, 103b)
.endm
/*
@@ -116,26 +117,26 @@ SYM_FUNC_START(copy_user_generic_unrolled)
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
- _ASM_EXTABLE_UA(1b, 30b)
- _ASM_EXTABLE_UA(2b, 30b)
- _ASM_EXTABLE_UA(3b, 30b)
- _ASM_EXTABLE_UA(4b, 30b)
- _ASM_EXTABLE_UA(5b, 30b)
- _ASM_EXTABLE_UA(6b, 30b)
- _ASM_EXTABLE_UA(7b, 30b)
- _ASM_EXTABLE_UA(8b, 30b)
- _ASM_EXTABLE_UA(9b, 30b)
- _ASM_EXTABLE_UA(10b, 30b)
- _ASM_EXTABLE_UA(11b, 30b)
- _ASM_EXTABLE_UA(12b, 30b)
- _ASM_EXTABLE_UA(13b, 30b)
- _ASM_EXTABLE_UA(14b, 30b)
- _ASM_EXTABLE_UA(15b, 30b)
- _ASM_EXTABLE_UA(16b, 30b)
- _ASM_EXTABLE_UA(18b, 40b)
- _ASM_EXTABLE_UA(19b, 40b)
- _ASM_EXTABLE_UA(21b, 50b)
- _ASM_EXTABLE_UA(22b, 50b)
+ _ASM_EXTABLE_CPY(1b, 30b)
+ _ASM_EXTABLE_CPY(2b, 30b)
+ _ASM_EXTABLE_CPY(3b, 30b)
+ _ASM_EXTABLE_CPY(4b, 30b)
+ _ASM_EXTABLE_CPY(5b, 30b)
+ _ASM_EXTABLE_CPY(6b, 30b)
+ _ASM_EXTABLE_CPY(7b, 30b)
+ _ASM_EXTABLE_CPY(8b, 30b)
+ _ASM_EXTABLE_CPY(9b, 30b)
+ _ASM_EXTABLE_CPY(10b, 30b)
+ _ASM_EXTABLE_CPY(11b, 30b)
+ _ASM_EXTABLE_CPY(12b, 30b)
+ _ASM_EXTABLE_CPY(13b, 30b)
+ _ASM_EXTABLE_CPY(14b, 30b)
+ _ASM_EXTABLE_CPY(15b, 30b)
+ _ASM_EXTABLE_CPY(16b, 30b)
+ _ASM_EXTABLE_CPY(18b, 40b)
+ _ASM_EXTABLE_CPY(19b, 40b)
+ _ASM_EXTABLE_CPY(21b, 50b)
+ _ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -180,8 +181,8 @@ SYM_FUNC_START(copy_user_generic_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 11b)
- _ASM_EXTABLE_UA(3b, 12b)
+ _ASM_EXTABLE_CPY(1b, 11b)
+ _ASM_EXTABLE_CPY(3b, 12b)
SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
@@ -213,7 +214,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 12b)
+ _ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
@@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
+ * Don't try to copy the tail if machine check happened
*
* Input:
* rdi destination
@@ -232,12 +234,25 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
+ cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
+ je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
- _ASM_EXTABLE_UA(1b, 2b)
+ /*
+ * Return zero to pretend that this copy succeeded. This
+ * is counter-intuitive, but needed to prevent the code
+ * in lib/iov_iter.c from retrying and running back into
+ * the poison cache line again. The machine check handler
+ * will ensure that a SIGBUS is sent to the task.
+ */
+3: xorl %eax,%eax
+ ASM_CLAC
+ ret
+
+ _ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
@@ -366,27 +381,27 @@ SYM_FUNC_START(__copy_user_nocache)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
- _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 3394a8ff7fd0..1fbd8ee9642d 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -18,9 +18,6 @@
* rdi source
* rsi destination
* edx len (32bit)
- * ecx sum (32bit)
- * r8 src_err_ptr (int)
- * r9 dst_err_ptr (int)
*
* Output
* eax 64bit sum. undefined in case of exception.
@@ -31,44 +28,32 @@
.macro source
10:
- _ASM_EXTABLE_UA(10b, .Lbad_source)
+ _ASM_EXTABLE_UA(10b, .Lfault)
.endm
.macro dest
20:
- _ASM_EXTABLE_UA(20b, .Lbad_dest)
+ _ASM_EXTABLE_UA(20b, .Lfault)
.endm
- /*
- * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
- * potentially unmapped kernel address.
- */
- .macro ignore L=.Lignore
-30:
- _ASM_EXTABLE(30b, \L)
- .endm
-
-
SYM_FUNC_START(csum_partial_copy_generic)
- cmpl $3*64, %edx
- jle .Lignore
-
-.Lignore:
- subq $7*8, %rsp
- movq %rbx, 2*8(%rsp)
- movq %r12, 3*8(%rsp)
- movq %r14, 4*8(%rsp)
- movq %r13, 5*8(%rsp)
- movq %r15, 6*8(%rsp)
+ subq $5*8, %rsp
+ movq %rbx, 0*8(%rsp)
+ movq %r12, 1*8(%rsp)
+ movq %r14, 2*8(%rsp)
+ movq %r13, 3*8(%rsp)
+ movq %r15, 4*8(%rsp)
- movq %r8, (%rsp)
- movq %r9, 1*8(%rsp)
-
- movl %ecx, %eax
+ movl $-1, %eax
+ xorl %r9d, %r9d
movl %edx, %ecx
+ cmpl $8, %ecx
+ jb .Lshort
- xorl %r9d, %r9d
- movq %rcx, %r12
+ testb $7, %sil
+ jne .Lunaligned
+.Laligned:
+ movl %ecx, %r12d
shrq $6, %r12
jz .Lhandle_tail /* < 64 */
@@ -99,7 +84,12 @@ SYM_FUNC_START(csum_partial_copy_generic)
source
movq 56(%rdi), %r13
- ignore 2f
+30:
+ /*
+ * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
+ * potentially unmapped kernel address.
+ */
+ _ASM_EXTABLE(30b, 2f)
prefetcht0 5*64(%rdi)
2:
adcq %rbx, %rax
@@ -131,8 +121,6 @@ SYM_FUNC_START(csum_partial_copy_generic)
dest
movq %r13, 56(%rsi)
-3:
-
leaq 64(%rdi), %rdi
leaq 64(%rsi), %rsi
@@ -142,8 +130,8 @@ SYM_FUNC_START(csum_partial_copy_generic)
/* do last up to 56 bytes */
.Lhandle_tail:
- /* ecx: count */
- movl %ecx, %r10d
+ /* ecx: count, rcx.63: the end result needs to be rol8 */
+ movq %rcx, %r10
andl $63, %ecx
shrl $3, %ecx
jz .Lfold
@@ -172,6 +160,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
.Lhandle_7:
movl %r10d, %ecx
andl $7, %ecx
+.L1: /* .Lshort rejoins the common path here */
shrl $1, %ecx
jz .Lhandle_1
movl $2, %edx
@@ -203,26 +192,65 @@ SYM_FUNC_START(csum_partial_copy_generic)
adcl %r9d, %eax /* carry */
.Lende:
- movq 2*8(%rsp), %rbx
- movq 3*8(%rsp), %r12
- movq 4*8(%rsp), %r14
- movq 5*8(%rsp), %r13
- movq 6*8(%rsp), %r15
- addq $7*8, %rsp
+ testq %r10, %r10
+ js .Lwas_odd
+.Lout:
+ movq 0*8(%rsp), %rbx
+ movq 1*8(%rsp), %r12
+ movq 2*8(%rsp), %r14
+ movq 3*8(%rsp), %r13
+ movq 4*8(%rsp), %r15
+ addq $5*8, %rsp
ret
+.Lshort:
+ movl %ecx, %r10d
+ jmp .L1
+.Lunaligned:
+ xorl %ebx, %ebx
+ testb $1, %sil
+ jne .Lodd
+1: testb $2, %sil
+ je 2f
+ source
+ movw (%rdi), %bx
+ dest
+ movw %bx, (%rsi)
+ leaq 2(%rdi), %rdi
+ subq $2, %rcx
+ leaq 2(%rsi), %rsi
+ addq %rbx, %rax
+2: testb $4, %sil
+ je .Laligned
+ source
+ movl (%rdi), %ebx
+ dest
+ movl %ebx, (%rsi)
+ leaq 4(%rdi), %rdi
+ subq $4, %rcx
+ leaq 4(%rsi), %rsi
+ addq %rbx, %rax
+ jmp .Laligned
+
+.Lodd:
+ source
+ movb (%rdi), %bl
+ dest
+ movb %bl, (%rsi)
+ leaq 1(%rdi), %rdi
+ leaq 1(%rsi), %rsi
+ /* decrement, set MSB */
+ leaq -1(%rcx, %rcx), %rcx
+ rorq $1, %rcx
+ shll $8, %ebx
+ addq %rbx, %rax
+ jmp 1b
+
+.Lwas_odd:
+ roll $8, %eax
+ jmp .Lout
- /* Exception handlers. Very simple, zeroing is done in the wrappers */
-.Lbad_source:
- movq (%rsp), %rax
- testq %rax, %rax
- jz .Lende
- movl $-EFAULT, (%rax)
- jmp .Lende
-
-.Lbad_dest:
- movq 8(%rsp), %rax
- testq %rax, %rax
- jz .Lende
- movl $-EFAULT, (%rax)
- jmp .Lende
+ /* Exception: just return 0 */
+.Lfault:
+ xorl %eax, %eax
+ jmp .Lout
SYM_FUNC_END(csum_partial_copy_generic)
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index ee63d7576fd2..189344924a2b 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -21,52 +21,16 @@
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_and_copy_from_user(const void __user *src, void *dst,
- int len, __wsum isum, int *errp)
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
{
- might_sleep();
- *errp = 0;
+ __wsum sum;
+ might_sleep();
if (!user_access_begin(src, len))
- goto out_err;
-
- /*
- * Why 6, not 7? To handle odd addresses aligned we
- * would need to do considerable complications to fix the
- * checksum which is defined as an 16bit accumulator. The
- * fix alignment code is primarily for performance
- * compatibility with 32bit and that will handle odd
- * addresses slowly too.
- */
- if (unlikely((unsigned long)src & 6)) {
- while (((unsigned long)src & 6) && len >= 2) {
- __u16 val16;
-
- unsafe_get_user(val16, (const __u16 __user *)src, out);
-
- *(__u16 *)dst = val16;
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
- isum = csum_partial_copy_generic((__force const void *)src,
- dst, len, isum, errp, NULL);
- user_access_end();
- if (unlikely(*errp))
- goto out_err;
-
- return isum;
-
-out:
+ return 0;
+ sum = csum_partial_copy_generic((__force const void *)src, dst, len);
user_access_end();
-out_err:
- *errp = -EFAULT;
- memset(dst, 0, len);
-
- return isum;
+ return sum;
}
EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -82,40 +46,16 @@ EXPORT_SYMBOL(csum_and_copy_from_user);
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_and_copy_to_user(const void *src, void __user *dst,
- int len, __wsum isum, int *errp)
+csum_and_copy_to_user(const void *src, void __user *dst, int len)
{
- __wsum ret;
+ __wsum sum;
might_sleep();
-
- if (!user_access_begin(dst, len)) {
- *errp = -EFAULT;
+ if (!user_access_begin(dst, len))
return 0;
- }
-
- if (unlikely((unsigned long)dst & 6)) {
- while (((unsigned long)dst & 6) && len >= 2) {
- __u16 val16 = *(__u16 *)src;
-
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- unsafe_put_user(val16, (__u16 __user *)dst, out);
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
-
- *errp = 0;
- ret = csum_partial_copy_generic(src, (void __force *)dst,
- len, isum, NULL, errp);
- user_access_end();
- return ret;
-out:
+ sum = csum_partial_copy_generic(src, (void __force *)dst, len);
user_access_end();
- *errp = -EFAULT;
- return isum;
+ return sum;
}
EXPORT_SYMBOL(csum_and_copy_to_user);
@@ -129,9 +69,9 @@ EXPORT_SYMBOL(csum_and_copy_to_user);
* Returns an 32bit unfolded checksum of the buffer.
*/
__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
{
- return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+ return csum_partial_copy_generic(src, dst, len);
}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index c8a85b512796..fa1bc2104b32 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -35,10 +35,21 @@
#include <asm/smap.h>
#include <asm/export.h>
+#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
+
+#ifdef CONFIG_X86_5LEVEL
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
+ __stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
+#else
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
+#endif
+
.text
SYM_FUNC_START(__get_user_1)
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ LOAD_TASK_SIZE_MINUS_N(0)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
@@ -51,15 +62,13 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2)
- add $1,%_ASM_AX
- jc bad_get_user
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ LOAD_TASK_SIZE_MINUS_N(1)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-2: movzwl -1(%_ASM_AX),%edx
+2: movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
@@ -67,15 +76,13 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4)
- add $3,%_ASM_AX
- jc bad_get_user
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ LOAD_TASK_SIZE_MINUS_N(3)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-3: movl -3(%_ASM_AX),%edx
+3: movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
ret
@@ -84,29 +91,25 @@ EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8)
#ifdef CONFIG_X86_64
- add $7,%_ASM_AX
- jc bad_get_user
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ LOAD_TASK_SIZE_MINUS_N(7)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-4: movq -7(%_ASM_AX),%rdx
+4: movq (%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
ret
#else
- add $7,%_ASM_AX
- jc bad_get_user_8
- mov PER_CPU_VAR(current_task), %_ASM_DX
- cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
+ LOAD_TASK_SIZE_MINUS_N(7)
+ cmp %_ASM_DX,%_ASM_AX
jae bad_get_user_8
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
-4: movl -7(%_ASM_AX),%edx
-5: movl -3(%_ASM_AX),%ecx
+4: movl (%_ASM_AX),%edx
+5: movl 4(%_ASM_AX),%ecx
xor %eax,%eax
ASM_CLAC
ret
@@ -114,6 +117,52 @@ SYM_FUNC_START(__get_user_8)
SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
+/* .. and the same for __get_user, just without the range checks */
+SYM_FUNC_START(__get_user_nocheck_1)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+6: movzbl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_1)
+EXPORT_SYMBOL(__get_user_nocheck_1)
+
+SYM_FUNC_START(__get_user_nocheck_2)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+7: movzwl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_2)
+EXPORT_SYMBOL(__get_user_nocheck_2)
+
+SYM_FUNC_START(__get_user_nocheck_4)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+8: movl (%_ASM_AX),%edx
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_4)
+EXPORT_SYMBOL(__get_user_nocheck_4)
+
+SYM_FUNC_START(__get_user_nocheck_8)
+ ASM_STAC
+ ASM_BARRIER_NOSPEC
+#ifdef CONFIG_X86_64
+9: movq (%_ASM_AX),%rdx
+#else
+9: movl (%_ASM_AX),%edx
+10: movl 4(%_ASM_AX),%ecx
+#endif
+ xor %eax,%eax
+ ASM_CLAC
+ ret
+SYM_FUNC_END(__get_user_nocheck_8)
+EXPORT_SYMBOL(__get_user_nocheck_8)
+
SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
ASM_CLAC
@@ -134,6 +183,7 @@ bad_get_user_8:
SYM_CODE_END(.Lbad_get_user_8_clac)
#endif
+/* get_user */
_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
@@ -143,3 +193,14 @@ SYM_CODE_END(.Lbad_get_user_8_clac)
_ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
_ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
#endif
+
+/* __get_user */
+ _ASM_EXTABLE_UA(6b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(7b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(8b, .Lbad_get_user_clac)
+#ifdef CONFIG_X86_64
+ _ASM_EXTABLE_UA(9b, .Lbad_get_user_clac)
+#else
+ _ASM_EXTABLE_UA(9b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE_UA(10b, .Lbad_get_user_8_clac)
+#endif
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 5e69603ff63f..58f7fb95c7f4 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -20,6 +20,7 @@
enum reg_type {
REG_TYPE_RM = 0,
+ REG_TYPE_REG,
REG_TYPE_INDEX,
REG_TYPE_BASE,
};
@@ -53,6 +54,30 @@ static bool is_string_insn(struct insn *insn)
}
/**
+ * insn_has_rep_prefix() - Determine if instruction has a REP prefix
+ * @insn: Instruction containing the prefix to inspect
+ *
+ * Returns:
+ *
+ * true if the instruction has a REP prefix, false if not.
+ */
+bool insn_has_rep_prefix(struct insn *insn)
+{
+ int i;
+
+ insn_get_prefixes(insn);
+
+ for (i = 0; i < insn->prefixes.nbytes; i++) {
+ insn_byte_t p = insn->prefixes.bytes[i];
+
+ if (p == 0xf2 || p == 0xf3)
+ return true;
+ }
+
+ return false;
+}
+
+/**
* get_seg_reg_override_idx() - obtain segment register override index
* @insn: Valid instruction with segment override prefixes
*
@@ -439,6 +464,13 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
regno += 8;
break;
+ case REG_TYPE_REG:
+ regno = X86_MODRM_REG(insn->modrm.value);
+
+ if (X86_REX_R(insn->rex_prefix.value))
+ regno += 8;
+ break;
+
case REG_TYPE_INDEX:
regno = X86_SIB_INDEX(insn->sib.value);
if (X86_REX_X(insn->rex_prefix.value))
@@ -808,6 +840,21 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs)
}
/**
+ * insn_get_modrm_reg_off() - Obtain register in reg part of the ModRM byte
+ * @insn: Instruction containing the ModRM byte
+ * @regs: Register values as seen when entering kernel mode
+ *
+ * Returns:
+ *
+ * The register indicated by the reg part of the ModRM byte. The
+ * register is obtained as an offset from the base of pt_regs.
+ */
+int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs)
+{
+ return get_reg_offset(insn, regs, REG_TYPE_REG);
+}
+
+/**
* get_seg_base_limit() - obtain base address and limit of a segment
* @insn: Instruction. Must be valid.
* @regs: Register values as seen when entering kernel mode
@@ -1367,3 +1414,86 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
return (void __user *)-1L;
}
}
+
+/**
+ * insn_fetch_from_user() - Copy instruction bytes from user-space memory
+ * @regs: Structure with register values as seen when entering kernel mode
+ * @buf: Array to store the fetched instruction
+ *
+ * Gets the linear address of the instruction and copies the instruction bytes
+ * to the buf.
+ *
+ * Returns:
+ *
+ * Number of instruction bytes copied.
+ *
+ * 0 if nothing was copied.
+ */
+int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
+{
+ unsigned long seg_base = 0;
+ int not_copied;
+
+ /*
+ * If not in user-space long mode, a custom code segment could be in
+ * use. This is true in protected mode (if the process defined a local
+ * descriptor table), or virtual-8086 mode. In most of the cases
+ * seg_base will be zero as in USER_CS.
+ */
+ if (!user_64bit_mode(regs)) {
+ seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+ if (seg_base == -1L)
+ return 0;
+ }
+
+
+ not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
+ MAX_INSN_SIZE);
+
+ return MAX_INSN_SIZE - not_copied;
+}
+
+/**
+ * insn_decode() - Decode an instruction
+ * @insn: Structure to store decoded instruction
+ * @regs: Structure with register values as seen when entering kernel mode
+ * @buf: Buffer containing the instruction bytes
+ * @buf_size: Number of instruction bytes available in buf
+ *
+ * Decodes the instruction provided in buf and stores the decoding results in
+ * insn. Also determines the correct address and operand sizes.
+ *
+ * Returns:
+ *
+ * True if instruction was decoded, False otherwise.
+ */
+bool insn_decode(struct insn *insn, struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE], int buf_size)
+{
+ int seg_defs;
+
+ insn_init(insn, buf, buf_size, user_64bit_mode(regs));
+
+ /*
+ * Override the default operand and address sizes with what is specified
+ * in the code segment descriptor. The instruction decoder only sets
+ * the address size it to either 4 or 8 address bytes and does nothing
+ * for the operand bytes. This OK for most of the cases, but we could
+ * have special cases where, for instance, a 16-bit code segment
+ * descriptor is used.
+ * If there is an address override prefix, the instruction decoder
+ * correctly updates these values, even for 16-bit defaults.
+ */
+ seg_defs = insn_get_code_seg_params(regs);
+ if (seg_defs == -EINVAL)
+ return false;
+
+ insn->addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
+ insn->opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
+
+ insn_get_length(insn);
+ if (buf_size < insn->length)
+ return false;
+
+ return true;
+}
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bbcc05bcefad..037faac46b0c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
.popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
- cmpl $8, %edx
- /* Less than 8 bytes? Go to byte copy loop */
- jb .L_no_whole_words
-
- /* Check for bad alignment of source */
- testl $7, %esi
- /* Already aligned */
- jz .L_8byte_aligned
-
- /* Copy one byte at a time until source is 8-byte aligned */
- movl %esi, %ecx
- andl $7, %ecx
- subl $8, %ecx
- negl %ecx
- subl %ecx, %edx
-.L_read_leading_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
- movl %edx, %ecx
- andl $7, %edx
- shrl $3, %ecx
- jz .L_no_whole_words
-
-.L_read_words:
- movq (%rsi), %r8
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
- MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
- movq %r8, (%rdi)
- addq $8, %rsi
- addq $8, %rdi
- decl %ecx
- jnz .L_read_words
-
- /* Any trailing bytes? */
-.L_no_whole_words:
- andl %edx, %edx
- jz .L_done_memcpy_trap
-
- /* Copy trailing bytes */
- movl %edx, %ecx
-.L_read_trailing_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_trailing_bytes
-
- /* Copy successful. Return zero */
-.L_done_memcpy_trap:
- xorl %eax, %eax
-.L_done:
- ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
- .section .fixup, "ax"
- /*
- * Return number of bytes not copied for any failure. Note that
- * there is no "tail" handling since the source buffer is 8-byte
- * aligned and poison is cacheline aligned.
- */
-.E_read_words:
- shll $3, %ecx
-.E_leading_bytes:
- addl %edx, %ecx
-.E_trailing_bytes:
- mov %ecx, %eax
- jmp .L_done
-
- /*
- * For write fault handling, given the destination is unaligned,
- * we handle faults on multi-byte writes with a byte-by-byte
- * copy up to the write-protected page.
- */
-.E_write_words:
- shll $3, %ecx
- addl %edx, %ecx
- movl %ecx, %edx
- jmp mcsafe_handle_tail
-
- .previous
-
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE(.L_write_words, .E_write_words)
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 7c7c92db8497..0ea344c5ea43 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -25,76 +25,87 @@
* Inputs: %eax[:%edx] contains the data
* %ecx contains the address
*
- * Outputs: %eax is error code (0 or -EFAULT)
+ * Outputs: %ecx is error code (0 or -EFAULT)
+ *
+ * Clobbers: %ebx needed for task pointer
*
* These functions should not modify any other registers,
* as they get called from within inline assembly.
*/
-#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
+#ifdef CONFIG_X86_5LEVEL
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \
+ __stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57
+#else
+#define LOAD_TASK_SIZE_MINUS_N(n) \
+ mov $(TASK_SIZE_MAX - (n)),%_ASM_BX
+#endif
.text
SYM_FUNC_START(__put_user_1)
- ENTER
- cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
+ LOAD_TASK_SIZE_MINUS_N(0)
+ cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
ASM_STAC
1: movb %al,(%_ASM_CX)
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
ret
SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
+EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2)
- ENTER
- mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
- sub $1,%_ASM_BX
+ LOAD_TASK_SIZE_MINUS_N(1)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
ASM_STAC
2: movw %ax,(%_ASM_CX)
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
ret
SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
+EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4)
- ENTER
- mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
- sub $3,%_ASM_BX
+ LOAD_TASK_SIZE_MINUS_N(3)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
ASM_STAC
3: movl %eax,(%_ASM_CX)
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
ret
SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
+EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8)
- ENTER
- mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
- sub $7,%_ASM_BX
+ LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
+SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
ASM_STAC
4: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
5: movl %edx,4(%_ASM_CX)
#endif
- xor %eax,%eax
+ xor %ecx,%ecx
ASM_CLAC
RET
SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
+EXPORT_SYMBOL(__put_user_nocheck_8)
SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
ASM_CLAC
.Lbad_put_user:
- movl $-EFAULT,%eax
+ movl $-EFAULT,%ecx
RET
SYM_CODE_END(.Lbad_put_user_clac)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b0dfac3d3df7..508c81e97ab1 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
}
EXPORT_SYMBOL(clear_user);
-/*
- * Similar to copy_user_handle_tail, probe for the write fault point,
- * but reuse __memcpy_mcsafe in case a new read error is encountered.
- * clac() is handled in _copy_to_iter_mcsafe().
- */
-__visible notrace unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
-
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
@@ -120,7 +99,7 @@ long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
*/
if (size < 8) {
if (!IS_ALIGNED(dest, 4) || size != 4)
- clean_cache_range(dst, 1);
+ clean_cache_range(dst, size);
} else {
if (!IS_ALIGNED(dest, 8)) {
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 770b613790b3..f5e1e60c9095 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -21,7 +21,8 @@ DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
#endif
-struct cpu_entry_area *get_cpu_entry_area(int cpu)
+/* Is called from entry code, so must be noinstr */
+noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
{
unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1d6cb07f4f86..b93d6cd08a7f 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -5,6 +5,7 @@
#include <xen/xen.h>
#include <asm/fpu/internal.h>
+#include <asm/sev-es.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
@@ -80,6 +81,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_uaccess);
+__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_copy);
+
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
@@ -125,17 +138,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);
-__visible bool ex_has_fault_handler(unsigned long ip)
+enum handler_type ex_get_fault_handler_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
- return false;
+ return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
-
- return handler == ex_handler_fault;
+ if (handler == ex_handler_fault)
+ return EX_HANDLER_FAULT;
+ else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
+ return EX_HANDLER_UACCESS;
+ else
+ return EX_HANDLER_OTHER;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 6e3e8a124903..82bf37a5c9ec 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1128,7 +1128,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
return 0;
}
-static int fault_in_kernel_space(unsigned long address)
+bool fault_in_kernel_space(unsigned long address)
{
/*
* On 64-bit systems, the vsyscall page is at an address above
@@ -1446,11 +1446,14 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
prefetchw(&current->mm->mmap_lock);
/*
- * KVM has two types of events that are, logically, interrupts, but
- * are unfortunately delivered using the #PF vector. These events are
- * "you just accessed valid memory, but the host doesn't have it right
- * now, so I'll put you to sleep if you continue" and "that memory
- * you tried to access earlier is available now."
+ * KVM uses #PF vector to deliver 'page not present' events to guests
+ * (asynchronous page fault mechanism). The event happens when a
+ * userspace task is trying to access some valid (from guest's point of
+ * view) memory which is not currently mapped by the host (e.g. the
+ * memory is swapped out). Note, the corresponding "page ready" event
+ * which is injected when the memory becomes available, is delived via
+ * an interrupt mechanism and not a #PF exception
+ * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
*
* We are relying on the interrupted context being sane (valid RSP,
* relevant locks not held, etc.), which is fine as long as the
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a4ac13cc3fdc..b5a3fa4033d3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -217,11 +217,6 @@ static void sync_global_pgds(unsigned long start, unsigned long end)
sync_global_pgds_l4(start, end);
}
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
-{
- sync_global_pgds(start, end);
-}
-
/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
@@ -1257,14 +1252,19 @@ static void __init preallocate_vmalloc_pages(void)
if (!p4d)
goto failed;
- /*
- * With 5-level paging the P4D level is not folded. So the PGDs
- * are now populated and there is no need to walk down to the
- * PUD level.
- */
if (pgtable_l5_enabled())
continue;
+ /*
+ * The goal here is to allocate all possibly required
+ * hardware page tables pointed to by the top hardware
+ * level.
+ *
+ * On 4-level systems, the P4D layer is folded away and
+ * the above code does no preallocation. Below, go down
+ * to the pud _software_ level to ensure the second
+ * hardware level is allocated on 4-level systems too.
+ */
lvl = "pud";
pud = pud_alloc(&init_mm, p4d, addr);
if (!pud)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 9f1177edc2e7..efbb3de472df 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -37,12 +37,13 @@
* reside in the .data section so as not to be zeroed out when the .bss
* section is later cleared.
*/
-u64 sme_me_mask __section(.data) = 0;
+u64 sme_me_mask __section(".data") = 0;
+u64 sev_status __section(".data") = 0;
EXPORT_SYMBOL(sme_me_mask);
DEFINE_STATIC_KEY_FALSE(sev_enable_key);
EXPORT_SYMBOL_GPL(sev_enable_key);
-bool sev_enabled __section(.data);
+bool sev_enabled __section(".data");
/* Buffer used for early in-place encryption by BSP, no locking needed */
static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
@@ -347,7 +348,13 @@ bool sme_active(void)
bool sev_active(void)
{
- return sme_me_mask && sev_enabled;
+ return sev_status & MSR_AMD64_SEV_ENABLED;
+}
+
+/* Needs to be called from non-instrumentable code */
+bool noinstr sev_es_active(void)
+{
+ return sev_status & MSR_AMD64_SEV_ES_ENABLED;
}
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
@@ -400,6 +407,31 @@ void __init mem_encrypt_free_decrypted_mem(void)
free_init_pages("unused decrypted", vaddr, vaddr_end);
}
+static void print_mem_encrypt_feature_info(void)
+{
+ pr_info("AMD Memory Encryption Features active:");
+
+ /* Secure Memory Encryption */
+ if (sme_active()) {
+ /*
+ * SME is mutually exclusive with any of the SEV
+ * features below.
+ */
+ pr_cont(" SME\n");
+ return;
+ }
+
+ /* Secure Encrypted Virtualization */
+ if (sev_active())
+ pr_cont(" SEV");
+
+ /* Encrypted Register State */
+ if (sev_es_active())
+ pr_cont(" SEV-ES");
+
+ pr_cont("\n");
+}
+
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void)
{
@@ -415,8 +447,6 @@ void __init mem_encrypt_init(void)
if (sev_active())
static_branch_enable(&sev_enable_key);
- pr_info("AMD %s active\n",
- sev_active() ? "Secure Encrypted Virtualization (SEV)"
- : "Secure Memory Encryption (SME)");
+ print_mem_encrypt_feature_info();
}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index e2b0e2ac07bb..733b983f3a89 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -81,7 +81,7 @@ struct sme_populate_pgd_data {
* section is 2MB aligned to allow for simple pagetable setup using only
* PMD entries (see vmlinux.lds.S).
*/
-static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch);
+static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch");
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
@@ -540,6 +540,9 @@ void __init sme_enable(struct boot_params *bp)
if (!(msr & MSR_AMD64_SEV_ENABLED))
return;
+ /* Save SEV_STATUS to avoid reading MSR again */
+ sev_status = msr;
+
/* SEV state cannot be controlled by a command line option */
sme_me_mask = me_mask;
sev_enabled = true;
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index aa76ec2d359b..44148691d78b 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -37,14 +37,12 @@ static __init int numa_setup(char *opt)
return -EINVAL;
if (!strncmp(opt, "off", 3))
numa_off = 1;
-#ifdef CONFIG_NUMA_EMU
if (!strncmp(opt, "fake=", 5))
- numa_emu_cmdline(opt + 5);
-#endif
-#ifdef CONFIG_ACPI_NUMA
+ return numa_emu_cmdline(opt + 5);
if (!strncmp(opt, "noacpi", 6))
- acpi_numa = -1;
-#endif
+ disable_srat();
+ if (!strncmp(opt, "nohmat", 6))
+ disable_hmat();
return 0;
}
early_param("numa", numa_setup);
@@ -516,7 +514,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
* memory ranges, because quirks such as trim_snb_memory()
* reserve specific pages for Sandy Bridge graphics. ]
*/
- for_each_memblock(reserved, mb_region) {
+ for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region);
if (nid != MAX_NUMNODES)
@@ -748,6 +746,27 @@ static void __init init_memory_less_node(int nid)
}
/*
+ * A node may exist which has one or more Generic Initiators but no CPUs and no
+ * memory.
+ *
+ * This function must be called after init_cpu_to_node(), to ensure that any
+ * memoryless CPU nodes have already been brought online, and before the
+ * node_data[nid] is needed for zone list setup in build_all_zonelists().
+ *
+ * When this function is called, any nodes containing either memory and/or CPUs
+ * will already be online and there is no need to do anything extra, even if
+ * they also contain one or more Generic Initiators.
+ */
+void __init init_gi_nodes(void)
+{
+ int nid;
+
+ for_each_node_state(nid, N_GENERIC_INITIATOR)
+ if (!node_online(nid))
+ init_memory_less_node(nid);
+}
+
+/*
* Setup early cpu_to_node.
*
* Populate cpu_to_node[] only if x86_cpu_to_apicid[],
@@ -919,7 +938,6 @@ int phys_to_target_node(phys_addr_t start)
return meminfo_to_nid(&numa_reserved_meminfo, start);
}
-EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start)
{
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 683cd12f4793..87d77cc52f86 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -13,9 +13,10 @@
static int emu_nid_to_phys[MAX_NUMNODES];
static char *emu_cmdline __initdata;
-void __init numa_emu_cmdline(char *str)
+int __init numa_emu_cmdline(char *str)
{
emu_cmdline = str;
+ return 0;
}
static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index d1b2a889f035..40baa90e74f4 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
/*
* Before changing the encryption attribute, we need to flush caches.
*/
- cpa_flush(&cpa, 1);
+ cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
ret = __change_page_attr_set_clr(&cpa, 1);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 0951b47e64c1..11666ba19b62 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -14,7 +14,6 @@
#include <asm/nospec-branch.h>
#include <asm/cache.h>
#include <asm/apic.h>
-#include <asm/uv/uv.h>
#include "mm_internal.h"
@@ -800,29 +799,6 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
trace_tlb_flush(TLB_REMOTE_SEND_IPI,
(info->end - info->start) >> PAGE_SHIFT);
- if (is_uv_system()) {
- /*
- * This whole special case is confused. UV has a "Broadcast
- * Assist Unit", which seems to be a fancy way to send IPIs.
- * Back when x86 used an explicit TLB flush IPI, UV was
- * optimized to use its own mechanism. These days, x86 uses
- * smp_call_function_many(), but UV still uses a manual IPI,
- * and that IPI's action is out of date -- it does a manual
- * flush instead of calling flush_tlb_func_remote(). This
- * means that the percpu tlb_gen variables won't be updated
- * and we'll do pointless flushes on future context switches.
- *
- * Rather than hooking native_flush_tlb_others() here, I think
- * that UV should be updated so that smp_call_function_many(),
- * etc, are optimal on UV.
- */
- cpumask = uv_flush_tlb_others(cpumask, info);
- if (cpumask)
- smp_call_function_many(cpumask, flush_tlb_func_remote,
- (void *)info, 1);
- return;
- }
-
/*
* If no page tables were freed, we can skip sending IPIs to
* CPUs in lazy TLB mode. They will flush the CPU themselves
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 42b6709e6dc7..796506dcfc42 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -221,14 +221,48 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
+/* Number of bytes that will be skipped on tailcall */
+#define X86_TAIL_CALL_OFFSET 11
-#define PROLOGUE_SIZE 25
+static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (callee_regs_used[0])
+ EMIT1(0x53); /* push rbx */
+ if (callee_regs_used[1])
+ EMIT2(0x41, 0x55); /* push r13 */
+ if (callee_regs_used[2])
+ EMIT2(0x41, 0x56); /* push r14 */
+ if (callee_regs_used[3])
+ EMIT2(0x41, 0x57); /* push r15 */
+ *pprog = prog;
+}
+
+static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (callee_regs_used[3])
+ EMIT2(0x41, 0x5F); /* pop r15 */
+ if (callee_regs_used[2])
+ EMIT2(0x41, 0x5E); /* pop r14 */
+ if (callee_regs_used[1])
+ EMIT2(0x41, 0x5D); /* pop r13 */
+ if (callee_regs_used[0])
+ EMIT1(0x5B); /* pop rbx */
+ *pprog = prog;
+}
/*
- * Emit x86-64 prologue code for BPF program and check its size.
- * bpf_tail_call helper will skip it while jumping into another program
+ * Emit x86-64 prologue code for BPF program.
+ * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
+ * while jumping to another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
+ bool tail_call_reachable, bool is_subprog)
{
u8 *prog = *pprog;
int cnt = X86_PATCH_SIZE;
@@ -238,19 +272,19 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*/
memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
prog += cnt;
+ if (!ebpf_from_cbpf) {
+ if (tail_call_reachable && !is_subprog)
+ EMIT2(0x31, 0xC0); /* xor eax, eax */
+ else
+ EMIT2(0x66, 0x90); /* nop2 */
+ }
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
- EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
- EMIT1(0x53); /* push rbx */
- EMIT2(0x41, 0x55); /* push r13 */
- EMIT2(0x41, 0x56); /* push r14 */
- EMIT2(0x41, 0x57); /* push r15 */
- if (!ebpf_from_cbpf) {
- /* zero init tail_call_cnt */
- EMIT2(0x6a, 0x00);
- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
- }
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
+ if (tail_call_reachable)
+ EMIT1(0x50); /* push rax */
*pprog = prog;
}
@@ -314,13 +348,14 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
mutex_lock(&text_mutex);
if (memcmp(ip, old_insn, X86_PATCH_SIZE))
goto out;
+ ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
if (text_live)
text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
else
memcpy(ip, new_insn, X86_PATCH_SIZE);
+ ret = 0;
}
- ret = 0;
out:
mutex_unlock(&text_mutex);
return ret;
@@ -337,6 +372,22 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}
+static int get_pop_bytes(bool *callee_regs_used)
+{
+ int bytes = 0;
+
+ if (callee_regs_used[3])
+ bytes += 2;
+ if (callee_regs_used[2])
+ bytes += 2;
+ if (callee_regs_used[1])
+ bytes += 2;
+ if (callee_regs_used[0])
+ bytes += 1;
+
+ return bytes;
+}
+
/*
* Generate the following code:
*
@@ -351,12 +402,32 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call_indirect(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
+ u32 stack_depth)
{
+ int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
- int label1, label2, label3;
+ int pop_bytes = 0;
+ int off1 = 42;
+ int off2 = 31;
+ int off3 = 9;
int cnt = 0;
+ /* count the additional bytes used for popping callee regs from stack
+ * that need to be taken into account for each of the offsets that
+ * are used for bailing out of the tail call
+ */
+ pop_bytes = get_pop_bytes(callee_regs_used);
+ off1 += pop_bytes;
+ off2 += pop_bytes;
+ off3 += pop_bytes;
+
+ if (stack_depth) {
+ off1 += 7;
+ off2 += 7;
+ off3 += 7;
+ }
+
/*
* rdi - pointer to ctx
* rsi - pointer to bpf_array
@@ -370,72 +441,112 @@ static void emit_bpf_tail_call_indirect(u8 **pprog)
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
+#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
- label1 = cnt;
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
+#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
- label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
/* prog = array->ptrs[index]; */
- EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
+ EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
/*
* if (prog == NULL)
* goto out;
*/
- EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
-#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
+ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
+#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
- label3 = cnt;
- /* goto *(prog->bpf_func + prologue_size); */
- EMIT4(0x48, 0x8B, 0x40, /* mov rax, qword ptr [rax + 32] */
- offsetof(struct bpf_prog, bpf_func));
- EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
+ *pprog = prog;
+ pop_callee_regs(pprog, callee_regs_used);
+ prog = *pprog;
+
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
+ round_up(stack_depth, 8));
+ /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
+ EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */
+ offsetof(struct bpf_prog, bpf_func));
+ EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */
+ X86_TAIL_CALL_OFFSET);
/*
- * Wow we're ready to jump into next BPF program
+ * Now we're ready to jump into next BPF program
* rdi == ctx (1st arg)
- * rax == prog->bpf_func + prologue_size
+ * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- RETPOLINE_RAX_BPF_JIT();
+ RETPOLINE_RCX_BPF_JIT();
/* out: */
- BUILD_BUG_ON(cnt - label1 != OFFSET1);
- BUILD_BUG_ON(cnt - label2 != OFFSET2);
- BUILD_BUG_ON(cnt - label3 != OFFSET3);
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
- u8 **pprog, int addr, u8 *image)
+ u8 **pprog, int addr, u8 *image,
+ bool *callee_regs_used, u32 stack_depth)
{
+ int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
+ int pop_bytes = 0;
+ int off1 = 20;
+ int poke_off;
int cnt = 0;
+ /* count the additional bytes used for popping callee regs to stack
+ * that need to be taken into account for jump offset that is used for
+ * bailing out from of the tail call when limit is reached
+ */
+ pop_bytes = get_pop_bytes(callee_regs_used);
+ off1 += pop_bytes;
+
+ /*
+ * total bytes for:
+ * - nop5/ jmpq $off
+ * - pop callee regs
+ * - sub rsp, $val if depth > 0
+ * - pop rax
+ */
+ poke_off = X86_PATCH_SIZE + pop_bytes + 1;
+ if (stack_depth) {
+ poke_off += 7;
+ off1 += 7;
+ }
+
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
- EMIT2(X86_JA, 14); /* ja out */
+ EMIT2(X86_JA, off1); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
- poke->ip = image + (addr - X86_PATCH_SIZE);
- poke->adj_off = PROLOGUE_SIZE;
+ poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
+ poke->adj_off = X86_TAIL_CALL_OFFSET;
+ poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
+ poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
+
+ emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
+ poke->tailcall_bypass);
+
+ *pprog = prog;
+ pop_callee_regs(pprog, callee_regs_used);
+ prog = *pprog;
+ EMIT1(0x58); /* pop rax */
+ if (stack_depth)
+ EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
@@ -453,7 +564,7 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
for (i = 0; i < prog->aux->size_poke_tab; i++) {
poke = &prog->aux->poke_tab[i];
- WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+ WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable));
if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
continue;
@@ -464,18 +575,25 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
if (target) {
/* Plain memcpy is used when image is not live yet
* and still not locked as read-only. Once poke
- * location is active (poke->ip_stable), any parallel
- * bpf_arch_text_poke() might occur still on the
- * read-write image until we finally locked it as
- * read-only. Both modifications on the given image
- * are under text_mutex to avoid interference.
+ * location is active (poke->tailcall_target_stable),
+ * any parallel bpf_arch_text_poke() might occur
+ * still on the read-write image until we finally
+ * locked it as read-only. Both modifications on
+ * the given image are under text_mutex to avoid
+ * interference.
*/
- ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
+ ret = __bpf_arch_text_poke(poke->tailcall_target,
+ BPF_MOD_JUMP, NULL,
(u8 *)target->bpf_func +
poke->adj_off, false);
BUG_ON(ret < 0);
+ ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ BPF_MOD_JUMP,
+ (u8 *)poke->tailcall_target +
+ X86_PATCH_SIZE, NULL, false);
+ BUG_ON(ret < 0);
}
- WRITE_ONCE(poke->ip_stable, true);
+ WRITE_ONCE(poke->tailcall_target_stable, true);
mutex_unlock(&array->aux->poke_mutex);
}
}
@@ -652,19 +770,49 @@ static bool ex_handler_bpf(const struct exception_table_entry *x,
return true;
}
+static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
+ bool *regs_used, bool *tail_call_seen)
+{
+ int i;
+
+ for (i = 1; i <= insn_cnt; i++, insn++) {
+ if (insn->code == (BPF_JMP | BPF_TAIL_CALL))
+ *tail_call_seen = true;
+ if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
+ regs_used[0] = true;
+ if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
+ regs_used[1] = true;
+ if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
+ regs_used[2] = true;
+ if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
+ regs_used[3] = true;
+ }
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
+ bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
struct bpf_insn *insn = bpf_prog->insnsi;
+ bool callee_regs_used[4] = {};
int insn_cnt = bpf_prog->len;
+ bool tail_call_seen = false;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
+ detect_reg_usage(insn, insn_cnt, callee_regs_used,
+ &tail_call_seen);
+
+ /* tail call's presence in current prog implies it is reachable */
+ tail_call_reachable |= tail_call_seen;
+
emit_prologue(&prog, bpf_prog->aux->stack_depth,
- bpf_prog_was_classic(bpf_prog));
+ bpf_prog_was_classic(bpf_prog), tail_call_reachable,
+ bpf_prog->aux->func_idx != 0);
+ push_callee_regs(&prog, callee_regs_used);
addrs[0] = prog - temp;
for (i = 1; i <= insn_cnt; i++, insn++) {
@@ -1102,16 +1250,27 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
- return -EINVAL;
+ if (tail_call_reachable) {
+ EMIT3_off32(0x48, 0x8B, 0x85,
+ -(bpf_prog->aux->stack_depth + 8));
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
+ return -EINVAL;
+ } else {
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
+ return -EINVAL;
+ }
break;
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
- &prog, addrs[i], image);
+ &prog, addrs[i], image,
+ callee_regs_used,
+ bpf_prog->aux->stack_depth);
else
- emit_bpf_tail_call_indirect(&prog);
+ emit_bpf_tail_call_indirect(&prog,
+ callee_regs_used,
+ bpf_prog->aux->stack_depth);
break;
/* cond jump */
@@ -1294,12 +1453,7 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
- if (!bpf_prog_was_classic(bpf_prog))
- EMIT1(0x5B); /* get rid of tail_call_cnt */
- EMIT2(0x41, 0x5F); /* pop r15 */
- EMIT2(0x41, 0x5E); /* pop r14 */
- EMIT2(0x41, 0x5D); /* pop r13 */
- EMIT1(0x5B); /* pop rbx */
+ pop_callee_regs(&prog, callee_regs_used);
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;
@@ -1379,10 +1533,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
u8 *prog = *pprog;
int cnt = 0;
- if (emit_call(&prog, __bpf_prog_enter, prog))
- return -EINVAL;
- /* remember prog start time returned by __bpf_prog_enter */
- emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+ if (p->aux->sleepable) {
+ if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
+ return -EINVAL;
+ } else {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+ }
/* arg1: lea rdi, [rbp - stack_size] */
EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1402,13 +1561,18 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (mod_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- /* arg1: mov rdi, progs[i] */
- emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
- (u32) (long) p);
- /* arg2: mov rsi, rbx <- start time in nsec */
- emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
- if (emit_call(&prog, __bpf_prog_exit, prog))
- return -EINVAL;
+ if (p->aux->sleepable) {
+ if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
+ return -EINVAL;
+ } else {
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
+ (u32) (long) p);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
*pprog = prog;
return 0;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index df1d95913d4e..3507f456fcd0 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pci_x86.h>
#include <asm/setup.h>
+#include <asm/irqdomain.h>
unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
PCI_PROBE_MMCONF;
@@ -633,8 +634,9 @@ static void set_dev_domain_options(struct pci_dev *pdev)
int pcibios_add_device(struct pci_dev *dev)
{
- struct setup_data *data;
struct pci_setup_rom *rom;
+ struct irq_domain *msidom;
+ struct setup_data *data;
u64 pa_data;
pa_data = boot_params.hdr.setup_data;
@@ -661,6 +663,20 @@ int pcibios_add_device(struct pci_dev *dev)
memunmap(data);
}
set_dev_domain_options(dev);
+
+ /*
+ * Setup the initial MSI domain of the device. If the underlying
+ * bus has a PCI/MSI irqdomain associated use the bus domain,
+ * otherwise set the default domain. This ensures that special irq
+ * domains e.g. VMD are preserved. The default ensures initial
+ * operation if irq remapping is not active. If irq remapping is
+ * active it will overwrite the domain pointer when the device is
+ * associated to a remapping domain.
+ */
+ msidom = dev_get_msi_domain(&dev->bus->dev);
+ if (!msidom)
+ msidom = x86_pci_msi_default_domain;
+ dev_set_msi_domain(&dev->dev, msidom);
return 0;
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b8c9a5b87f37..0a0e168be1cb 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -587,7 +587,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0xa26d, pci_invalid_bar);
static void pci_fixup_amd_ehci_pme(struct pci_dev *dev)
{
dev_info(&dev->dev, "PME# does not work under D3, disabling it\n");
- dev->pme_support &= ~((PCI_PM_CAP_PME_D3 | PCI_PM_CAP_PME_D3cold)
+ dev->pme_support &= ~((PCI_PM_CAP_PME_D3hot | PCI_PM_CAP_PME_D3cold)
>> PCI_PM_CAP_PME_SHIFT);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme);
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index 5fc617edf108..00bfa1ebad6c 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -3,16 +3,17 @@
#include <linux/init.h>
#include <asm/pci_x86.h>
#include <asm/x86_init.h>
+#include <asm/irqdomain.h>
/* arch_initcall has too random ordering, so call the initializers
in the right sequence from here. */
static __init int pci_arch_init(void)
{
-#ifdef CONFIG_PCI_DIRECT
- int type = 0;
+ int type;
+
+ x86_create_pci_msi_domain();
type = pci_direct_probe();
-#endif
if (!(pci_probe & PCI_PROBE_NOEARLY))
pci_mmcfg_early_init();
@@ -20,18 +21,16 @@ static __init int pci_arch_init(void)
if (x86_init.pci.arch_init && !x86_init.pci.arch_init())
return 0;
-#ifdef CONFIG_PCI_BIOS
pci_pcbios_init();
-#endif
+
/*
* don't check for raw_pci_ops here because we want pcbios as last
* fallback, yet it's needed to run first to set pcibios_last_bus
* in case legacy PCI probing is used. otherwise detecting peer busses
* fails.
*/
-#ifdef CONFIG_PCI_DIRECT
pci_direct_init(type);
-#endif
+
if (!raw_pci_ops && !raw_pci_ext_ops)
printk(KERN_ERR
"PCI: Fatal: No config space access function found\n");
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 00c62115f39c..24ca4ee2802f 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -33,6 +33,7 @@
#include <asm/hw_irq.h>
#include <asm/io_apic.h>
#include <asm/intel-mid.h>
+#include <asm/acpi.h>
#define PCIE_CAP_OFFSET 0x100
@@ -322,7 +323,7 @@ static void pci_d3delay_fixup(struct pci_dev *dev)
*/
if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID))
return;
- dev->d3_delay = 0;
+ dev->d3hot_delay = 0;
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index c313d784efab..5701d5ba3df4 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -15,7 +15,6 @@
#include <asm/iommu.h>
#define STA2X11_SWIOTLB_SIZE (4*1024*1024)
-extern int swiotlb_late_init_with_default_size(size_t default_size);
/*
* We build a list of bus numbers that are under the ConneXt. The
@@ -133,7 +132,7 @@ static void sta2x11_map_ep(struct pci_dev *pdev)
struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev);
struct device *dev = &pdev->dev;
u32 amba_base, max_amba_addr;
- int i;
+ int i, ret;
if (!instance)
return;
@@ -141,7 +140,9 @@ static void sta2x11_map_ep(struct pci_dev *pdev)
pci_read_config_dword(pdev, AHB_BASE(0), &amba_base);
max_amba_addr = amba_base + STA2X11_AMBA_SIZE - 1;
- dev->dma_pfn_offset = PFN_DOWN(-amba_base);
+ ret = dma_direct_set_offset(dev, 0, amba_base, STA2X11_AMBA_SIZE);
+ if (ret)
+ dev_err(dev, "sta2x11: could not set DMA offset\n");
dev->bus_dma_limit = max_amba_addr;
pci_set_consistent_dma_mask(pdev, max_amba_addr);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 89395a5049bb..c552cd2d0632 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -157,6 +157,13 @@ static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
struct xen_pci_frontend_ops *xen_pci_frontend;
EXPORT_SYMBOL_GPL(xen_pci_frontend);
+struct xen_msi_ops {
+ int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
+ void (*teardown_msi_irqs)(struct pci_dev *dev);
+};
+
+static struct xen_msi_ops xen_msi_ops __ro_after_init;
+
static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int irq, ret, i;
@@ -372,28 +379,122 @@ static void xen_initdom_restore_msi_irqs(struct pci_dev *dev)
WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
}
}
-#endif
+#else /* CONFIG_XEN_DOM0 */
+#define xen_initdom_setup_msi_irqs NULL
+#define xen_initdom_restore_msi_irqs NULL
+#endif /* !CONFIG_XEN_DOM0 */
static void xen_teardown_msi_irqs(struct pci_dev *dev)
{
struct msi_desc *msidesc;
+ int i;
+
+ for_each_pci_msi_entry(msidesc, dev) {
+ if (msidesc->irq) {
+ for (i = 0; i < msidesc->nvec_used; i++)
+ xen_destroy_irq(msidesc->irq + i);
+ }
+ }
+}
+
+static void xen_pv_teardown_msi_irqs(struct pci_dev *dev)
+{
+ struct msi_desc *msidesc = first_pci_msi_entry(dev);
- msidesc = first_pci_msi_entry(dev);
if (msidesc->msi_attrib.is_msix)
xen_pci_frontend_disable_msix(dev);
else
xen_pci_frontend_disable_msi(dev);
- /* Free the IRQ's and the msidesc using the generic code. */
- default_teardown_msi_irqs(dev);
+ xen_teardown_msi_irqs(dev);
}
-static void xen_teardown_msi_irq(unsigned int irq)
+static int xen_msi_domain_alloc_irqs(struct irq_domain *domain,
+ struct device *dev, int nvec)
{
- xen_destroy_irq(irq);
+ int type;
+
+ if (WARN_ON_ONCE(!dev_is_pci(dev)))
+ return -EINVAL;
+
+ if (first_msi_entry(dev)->msi_attrib.is_msix)
+ type = PCI_CAP_ID_MSIX;
+ else
+ type = PCI_CAP_ID_MSI;
+
+ return xen_msi_ops.setup_msi_irqs(to_pci_dev(dev), nvec, type);
}
-#endif
+static void xen_msi_domain_free_irqs(struct irq_domain *domain,
+ struct device *dev)
+{
+ if (WARN_ON_ONCE(!dev_is_pci(dev)))
+ return;
+
+ xen_msi_ops.teardown_msi_irqs(to_pci_dev(dev));
+}
+
+static struct msi_domain_ops xen_pci_msi_domain_ops = {
+ .domain_alloc_irqs = xen_msi_domain_alloc_irqs,
+ .domain_free_irqs = xen_msi_domain_free_irqs,
+};
+
+static struct msi_domain_info xen_pci_msi_domain_info = {
+ .ops = &xen_pci_msi_domain_ops,
+};
+
+/*
+ * This irq domain is a blatant violation of the irq domain design, but
+ * distangling XEN into real irq domains is not a job for mere mortals with
+ * limited XENology. But it's the least dangerous way for a mere mortal to
+ * get rid of the arch_*_msi_irqs() hackery in order to store the irq
+ * domain pointer in struct device. This irq domain wrappery allows to do
+ * that without breaking XEN terminally.
+ */
+static __init struct irq_domain *xen_create_pci_msi_domain(void)
+{
+ struct irq_domain *d = NULL;
+ struct fwnode_handle *fn;
+
+ fn = irq_domain_alloc_named_fwnode("XEN-MSI");
+ if (fn)
+ d = msi_create_irq_domain(fn, &xen_pci_msi_domain_info, NULL);
+
+ /* FIXME: No idea how to survive if this fails */
+ BUG_ON(!d);
+
+ return d;
+}
+
+static __init void xen_setup_pci_msi(void)
+{
+ if (xen_pv_domain()) {
+ if (xen_initial_domain()) {
+ xen_msi_ops.setup_msi_irqs = xen_initdom_setup_msi_irqs;
+ x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
+ } else {
+ xen_msi_ops.setup_msi_irqs = xen_setup_msi_irqs;
+ }
+ xen_msi_ops.teardown_msi_irqs = xen_pv_teardown_msi_irqs;
+ pci_msi_ignore_mask = 1;
+ } else if (xen_hvm_domain()) {
+ xen_msi_ops.setup_msi_irqs = xen_hvm_setup_msi_irqs;
+ xen_msi_ops.teardown_msi_irqs = xen_teardown_msi_irqs;
+ } else {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ /*
+ * Override the PCI/MSI irq domain init function. No point
+ * in allocating the native domain and never use it.
+ */
+ x86_init.irqs.create_pci_msi_domain = xen_create_pci_msi_domain;
+}
+
+#else /* CONFIG_PCI_MSI */
+static inline void xen_setup_pci_msi(void) { }
+#endif /* CONFIG_PCI_MSI */
int __init pci_xen_init(void)
{
@@ -410,17 +511,12 @@ int __init pci_xen_init(void)
/* Keep ACPI out of the picture */
acpi_noirq_set();
-#ifdef CONFIG_PCI_MSI
- x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
- x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
- x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
- pci_msi_ignore_mask = 1;
-#endif
+ xen_setup_pci_msi();
return 0;
}
#ifdef CONFIG_PCI_MSI
-void __init xen_msi_init(void)
+static void __init xen_hvm_msi_init(void)
{
if (!disable_apic) {
/*
@@ -435,9 +531,7 @@ void __init xen_msi_init(void)
((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC)))
return;
}
-
- x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
- x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ xen_setup_pci_msi();
}
#endif
@@ -460,7 +554,7 @@ int __init pci_xen_hvm_init(void)
* We need to wait until after x2apic is initialized
* before we can set MSI IRQ ops.
*/
- x86_platform.apic_post_init = xen_msi_init;
+ x86_platform.apic_post_init = xen_hvm_msi_init;
#endif
return 0;
}
@@ -470,12 +564,7 @@ int __init pci_xen_initial_domain(void)
{
int irq;
-#ifdef CONFIG_PCI_MSI
- x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
- x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
- x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
- pci_msi_ignore_mask = 1;
-#endif
+ xen_setup_pci_msi();
__acpi_register_gsi = acpi_register_gsi_xen;
__acpi_unregister_gsi = NULL;
/*
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index d37ebe6e70d7..8a26e705cb06 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -90,6 +90,9 @@ static const unsigned long * const efi_tables[] = {
&efi.tpm_log,
&efi.tpm_final_log,
&efi_rng_seed,
+#ifdef CONFIG_LOAD_UEFI_KEYS
+ &efi.mokvar_table,
+#endif
};
u64 efi_setup; /* efi setup_data physical address */
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6af4da1149ba..8f5759df7776 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -47,6 +47,7 @@
#include <asm/realmode.h>
#include <asm/time.h>
#include <asm/pgalloc.h>
+#include <asm/sev-es.h>
/*
* We allocate runtime services regions top-down, starting from -4G, i.e.
@@ -230,6 +231,15 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
}
/*
+ * When SEV-ES is active, the GHCB as set by the kernel will be used
+ * by firmware. Create a 1:1 unencrypted mapping for each GHCB.
+ */
+ if (sev_es_efi_map_ghcbs(pgd)) {
+ pr_err("Failed to create 1:1 mapping for the GHCBs!\n");
+ return 1;
+ }
+
+ /*
* When making calls to the firmware everything needs to be 1:1
* mapped and addressable with 32-bit pointers. Map the kernel
* text and allocate a new stack because we can't rely on the
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index c0a502f7e3a7..9ac7457f52a3 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -19,8 +19,8 @@
* pvh_bootparams and pvh_start_info need to live in the data segment since
* they are used after startup_{32|64}, which clear .bss, are invoked.
*/
-struct boot_params pvh_bootparams __attribute__((section(".data")));
-struct hvm_start_info pvh_start_info __attribute__((section(".data")));
+struct boot_params pvh_bootparams __section(".data");
+struct hvm_start_info pvh_start_info __section(".data");
unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile
index a3693c829e2e..224ff0504890 100644
--- a/arch/x86/platform/uv/Makefile
+++ b/arch/x86/platform/uv/Makefile
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o
+obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index a2f447dffea6..54511eaccf4d 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -2,8 +2,9 @@
/*
* BIOS run time interface routines.
*
- * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Russ Anderson <rja@sgi.com>
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
*/
#include <linux/efi.h>
@@ -170,16 +171,27 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
(u64)decode, (u64)domain, (u64)bus, 0, 0);
}
-int uv_bios_init(void)
+unsigned long get_uv_systab_phys(bool msg)
{
- uv_systab = NULL;
if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
!uv_systab_phys || efi_runtime_disabled()) {
- pr_crit("UV: UVsystab: missing\n");
- return -EEXIST;
+ if (msg)
+ pr_crit("UV: UVsystab: missing\n");
+ return 0;
}
+ return uv_systab_phys;
+}
+
+int uv_bios_init(void)
+{
+ unsigned long uv_systab_phys_addr;
+
+ uv_systab = NULL;
+ uv_systab_phys_addr = get_uv_systab_phys(1);
+ if (!uv_systab_phys_addr)
+ return -EEXIST;
- uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
+ uv_systab = ioremap(uv_systab_phys_addr, sizeof(struct uv_systab));
if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
pr_err("UV: UVsystab: bad signature!\n");
iounmap(uv_systab);
@@ -191,7 +203,7 @@ int uv_bios_init(void)
int size = uv_systab->size;
iounmap(uv_systab);
- uv_systab = ioremap(uv_systab_phys, size);
+ uv_systab = ioremap(uv_systab_phys_addr, size);
if (!uv_systab) {
pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
return -EFAULT;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
deleted file mode 100644
index 62ea907668f8..000000000000
--- a/arch/x86/platform/uv/tlb_uv.c
+++ /dev/null
@@ -1,2097 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SGI UltraViolet TLB flush routines.
- *
- * (c) 2008-2014 Cliff Wickman <cpw@sgi.com>, SGI.
- */
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-
-#include <asm/mmu_context.h>
-#include <asm/uv/uv.h>
-#include <asm/uv/uv_mmrs.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
-#include <asm/apic.h>
-#include <asm/tsc.h>
-#include <asm/irq_vectors.h>
-#include <asm/timer.h>
-
-static struct bau_operations ops __ro_after_init;
-
-static int timeout_us;
-static bool nobau = true;
-static int nobau_perm;
-
-/* tunables: */
-static int max_concurr = MAX_BAU_CONCURRENT;
-static int max_concurr_const = MAX_BAU_CONCURRENT;
-static int plugged_delay = PLUGGED_DELAY;
-static int plugsb4reset = PLUGSB4RESET;
-static int giveup_limit = GIVEUP_LIMIT;
-static int timeoutsb4reset = TIMEOUTSB4RESET;
-static int ipi_reset_limit = IPI_RESET_LIMIT;
-static int complete_threshold = COMPLETE_THRESHOLD;
-static int congested_respns_us = CONGESTED_RESPONSE_US;
-static int congested_reps = CONGESTED_REPS;
-static int disabled_period = DISABLED_PERIOD;
-
-static struct tunables tunables[] = {
- {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
- {&plugged_delay, PLUGGED_DELAY},
- {&plugsb4reset, PLUGSB4RESET},
- {&timeoutsb4reset, TIMEOUTSB4RESET},
- {&ipi_reset_limit, IPI_RESET_LIMIT},
- {&complete_threshold, COMPLETE_THRESHOLD},
- {&congested_respns_us, CONGESTED_RESPONSE_US},
- {&congested_reps, CONGESTED_REPS},
- {&disabled_period, DISABLED_PERIOD},
- {&giveup_limit, GIVEUP_LIMIT}
-};
-
-static struct dentry *tunables_dir;
-
-/* these correspond to the statistics printed by ptc_seq_show() */
-static char *stat_description[] = {
- "sent: number of shootdown messages sent",
- "stime: time spent sending messages",
- "numuvhubs: number of hubs targeted with shootdown",
- "numuvhubs16: number times 16 or more hubs targeted",
- "numuvhubs8: number times 8 or more hubs targeted",
- "numuvhubs4: number times 4 or more hubs targeted",
- "numuvhubs2: number times 2 or more hubs targeted",
- "numuvhubs1: number times 1 hub targeted",
- "numcpus: number of cpus targeted with shootdown",
- "dto: number of destination timeouts",
- "retries: destination timeout retries sent",
- "rok: : destination timeouts successfully retried",
- "resetp: ipi-style resource resets for plugs",
- "resett: ipi-style resource resets for timeouts",
- "giveup: fall-backs to ipi-style shootdowns",
- "sto: number of source timeouts",
- "bz: number of stay-busy's",
- "throt: number times spun in throttle",
- "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
- "recv: shootdown messages received",
- "rtime: time spent processing messages",
- "all: shootdown all-tlb messages",
- "one: shootdown one-tlb messages",
- "mult: interrupts that found multiple messages",
- "none: interrupts that found no messages",
- "retry: number of retry messages processed",
- "canc: number messages canceled by retries",
- "nocan: number retries that found nothing to cancel",
- "reset: number of ipi-style reset requests processed",
- "rcan: number messages canceled by reset requests",
- "disable: number times use of the BAU was disabled",
- "enable: number times use of the BAU was re-enabled"
-};
-
-static int __init setup_bau(char *arg)
-{
- int result;
-
- if (!arg)
- return -EINVAL;
-
- result = strtobool(arg, &nobau);
- if (result)
- return result;
-
- /* we need to flip the logic here, so that bau=y sets nobau to false */
- nobau = !nobau;
-
- if (!nobau)
- pr_info("UV BAU Enabled\n");
- else
- pr_info("UV BAU Disabled\n");
-
- return 0;
-}
-early_param("bau", setup_bau);
-
-/* base pnode in this partition */
-static int uv_base_pnode __read_mostly;
-
-static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
-static DEFINE_PER_CPU(struct bau_control, bau_control);
-static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
-
-static void
-set_bau_on(void)
-{
- int cpu;
- struct bau_control *bcp;
-
- if (nobau_perm) {
- pr_info("BAU not initialized; cannot be turned on\n");
- return;
- }
- nobau = false;
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->nobau = false;
- }
- pr_info("BAU turned on\n");
- return;
-}
-
-static void
-set_bau_off(void)
-{
- int cpu;
- struct bau_control *bcp;
-
- nobau = true;
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->nobau = true;
- }
- pr_info("BAU turned off\n");
- return;
-}
-
-/*
- * Determine the first node on a uvhub. 'Nodes' are used for kernel
- * memory allocation.
- */
-static int __init uvhub_to_first_node(int uvhub)
-{
- int node, b;
-
- for_each_online_node(node) {
- b = uv_node_to_blade_id(node);
- if (uvhub == b)
- return node;
- }
- return -1;
-}
-
-/*
- * Determine the apicid of the first cpu on a uvhub.
- */
-static int __init uvhub_to_first_apicid(int uvhub)
-{
- int cpu;
-
- for_each_present_cpu(cpu)
- if (uvhub == uv_cpu_to_blade_id(cpu))
- return per_cpu(x86_cpu_to_apicid, cpu);
- return -1;
-}
-
-/*
- * Free a software acknowledge hardware resource by clearing its Pending
- * bit. This will return a reply to the sender.
- * If the message has timed out, a reply has already been sent by the
- * hardware but the resource has not been released. In that case our
- * clear of the Timeout bit (as well) will free the resource. No reply will
- * be sent (the hardware will only do one reply per message).
- */
-static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
- int do_acknowledge)
-{
- unsigned long dw;
- struct bau_pq_entry *msg;
-
- msg = mdp->msg;
- if (!msg->canceled && do_acknowledge) {
- dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
- ops.write_l_sw_ack(dw);
- }
- msg->replied_to = 1;
- msg->swack_vec = 0;
-}
-
-/*
- * Process the receipt of a RETRY message
- */
-static void bau_process_retry_msg(struct msg_desc *mdp,
- struct bau_control *bcp)
-{
- int i;
- int cancel_count = 0;
- unsigned long msg_res;
- unsigned long mmr = 0;
- struct bau_pq_entry *msg = mdp->msg;
- struct bau_pq_entry *msg2;
- struct ptc_stats *stat = bcp->statp;
-
- stat->d_retries++;
- /*
- * cancel any message from msg+1 to the retry itself
- */
- for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
- if (msg2 > mdp->queue_last)
- msg2 = mdp->queue_first;
- if (msg2 == msg)
- break;
-
- /* same conditions for cancellation as do_reset */
- if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
- (msg2->swack_vec) && ((msg2->swack_vec &
- msg->swack_vec) == 0) &&
- (msg2->sending_cpu == msg->sending_cpu) &&
- (msg2->msg_type != MSG_NOOP)) {
- mmr = ops.read_l_sw_ack();
- msg_res = msg2->swack_vec;
- /*
- * This is a message retry; clear the resources held
- * by the previous message only if they timed out.
- * If it has not timed out we have an unexpected
- * situation to report.
- */
- if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
- unsigned long mr;
- /*
- * Is the resource timed out?
- * Make everyone ignore the cancelled message.
- */
- msg2->canceled = 1;
- stat->d_canceled++;
- cancel_count++;
- mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
- ops.write_l_sw_ack(mr);
- }
- }
- }
- if (!cancel_count)
- stat->d_nocanceled++;
-}
-
-/*
- * Do all the things a cpu should do for a TLB shootdown message.
- * Other cpu's may come here at the same time for this message.
- */
-static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
- int do_acknowledge)
-{
- short socket_ack_count = 0;
- short *sp;
- struct atomic_short *asp;
- struct ptc_stats *stat = bcp->statp;
- struct bau_pq_entry *msg = mdp->msg;
- struct bau_control *smaster = bcp->socket_master;
-
- /*
- * This must be a normal message, or retry of a normal message
- */
- if (msg->address == TLB_FLUSH_ALL) {
- flush_tlb_local();
- stat->d_alltlb++;
- } else {
- flush_tlb_one_user(msg->address);
- stat->d_onetlb++;
- }
- stat->d_requestee++;
-
- /*
- * One cpu on each uvhub has the additional job on a RETRY
- * of releasing the resource held by the message that is
- * being retried. That message is identified by sending
- * cpu number.
- */
- if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
- bau_process_retry_msg(mdp, bcp);
-
- /*
- * This is a swack message, so we have to reply to it.
- * Count each responding cpu on the socket. This avoids
- * pinging the count's cache line back and forth between
- * the sockets.
- */
- sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
- asp = (struct atomic_short *)sp;
- socket_ack_count = atom_asr(1, asp);
- if (socket_ack_count == bcp->cpus_in_socket) {
- int msg_ack_count;
- /*
- * Both sockets dump their completed count total into
- * the message's count.
- */
- *sp = 0;
- asp = (struct atomic_short *)&msg->acknowledge_count;
- msg_ack_count = atom_asr(socket_ack_count, asp);
-
- if (msg_ack_count == bcp->cpus_in_uvhub) {
- /*
- * All cpus in uvhub saw it; reply
- * (unless we are in the UV2 workaround)
- */
- reply_to_message(mdp, bcp, do_acknowledge);
- }
- }
-
- return;
-}
-
-/*
- * Determine the first cpu on a pnode.
- */
-static int pnode_to_first_cpu(int pnode, struct bau_control *smaster)
-{
- int cpu;
- struct hub_and_pnode *hpp;
-
- for_each_present_cpu(cpu) {
- hpp = &smaster->thp[cpu];
- if (pnode == hpp->pnode)
- return cpu;
- }
- return -1;
-}
-
-/*
- * Last resort when we get a large number of destination timeouts is
- * to clear resources held by a given cpu.
- * Do this with IPI so that all messages in the BAU message queue
- * can be identified by their nonzero swack_vec field.
- *
- * This is entered for a single cpu on the uvhub.
- * The sender want's this uvhub to free a specific message's
- * swack resources.
- */
-static void do_reset(void *ptr)
-{
- int i;
- struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
- struct reset_args *rap = (struct reset_args *)ptr;
- struct bau_pq_entry *msg;
- struct ptc_stats *stat = bcp->statp;
-
- stat->d_resets++;
- /*
- * We're looking for the given sender, and
- * will free its swack resource.
- * If all cpu's finally responded after the timeout, its
- * message 'replied_to' was set.
- */
- for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
- unsigned long msg_res;
- /* do_reset: same conditions for cancellation as
- bau_process_retry_msg() */
- if ((msg->replied_to == 0) &&
- (msg->canceled == 0) &&
- (msg->sending_cpu == rap->sender) &&
- (msg->swack_vec) &&
- (msg->msg_type != MSG_NOOP)) {
- unsigned long mmr;
- unsigned long mr;
- /*
- * make everyone else ignore this message
- */
- msg->canceled = 1;
- /*
- * only reset the resource if it is still pending
- */
- mmr = ops.read_l_sw_ack();
- msg_res = msg->swack_vec;
- mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
- if (mmr & msg_res) {
- stat->d_rcanceled++;
- ops.write_l_sw_ack(mr);
- }
- }
- }
- return;
-}
-
-/*
- * Use IPI to get all target uvhubs to release resources held by
- * a given sending cpu number.
- */
-static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
-{
- int pnode;
- int apnode;
- int maskbits;
- int sender = bcp->cpu;
- cpumask_t *mask = bcp->uvhub_master->cpumask;
- struct bau_control *smaster = bcp->socket_master;
- struct reset_args reset_args;
-
- reset_args.sender = sender;
- cpumask_clear(mask);
- /* find a single cpu for each uvhub in this distribution mask */
- maskbits = sizeof(struct pnmask) * BITSPERBYTE;
- /* each bit is a pnode relative to the partition base pnode */
- for (pnode = 0; pnode < maskbits; pnode++) {
- int cpu;
- if (!bau_uvhub_isset(pnode, distribution))
- continue;
- apnode = pnode + bcp->partition_base_pnode;
- cpu = pnode_to_first_cpu(apnode, smaster);
- cpumask_set_cpu(cpu, mask);
- }
-
- /* IPI all cpus; preemption is already disabled */
- smp_call_function_many(mask, do_reset, (void *)&reset_args, 1);
- return;
-}
-
-/*
- * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative
- * number, not an absolute. It converts a duration in cycles to a duration in
- * ns.
- */
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- struct cyc2ns_data data;
- unsigned long long ns;
-
- cyc2ns_read_begin(&data);
- ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
- cyc2ns_read_end();
-
- return ns;
-}
-
-/*
- * The reverse of the above; converts a duration in ns to a duration in cycles.
- */
-static inline unsigned long long ns_2_cycles(unsigned long long ns)
-{
- struct cyc2ns_data data;
- unsigned long long cyc;
-
- cyc2ns_read_begin(&data);
- cyc = (ns << data.cyc2ns_shift) / data.cyc2ns_mul;
- cyc2ns_read_end();
-
- return cyc;
-}
-
-static inline unsigned long cycles_2_us(unsigned long long cyc)
-{
- return cycles_2_ns(cyc) / NSEC_PER_USEC;
-}
-
-static inline cycles_t sec_2_cycles(unsigned long sec)
-{
- return ns_2_cycles(sec * NSEC_PER_SEC);
-}
-
-static inline unsigned long long usec_2_cycles(unsigned long usec)
-{
- return ns_2_cycles(usec * NSEC_PER_USEC);
-}
-
-/*
- * wait for all cpus on this hub to finish their sends and go quiet
- * leaves uvhub_quiesce set so that no new broadcasts are started by
- * bau_flush_send_and_wait()
- */
-static inline void quiesce_local_uvhub(struct bau_control *hmaster)
-{
- atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
-}
-
-/*
- * mark this quiet-requestor as done
- */
-static inline void end_uvhub_quiesce(struct bau_control *hmaster)
-{
- atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
-}
-
-/*
- * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
- * But not currently used.
- */
-static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc)
-{
- return ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
-}
-
-/*
- * Entered when a bau descriptor has gone into a permanent busy wait because
- * of a hardware bug.
- * Workaround the bug.
- */
-static int handle_uv2_busy(struct bau_control *bcp)
-{
- struct ptc_stats *stat = bcp->statp;
-
- stat->s_uv2_wars++;
- bcp->busy = 1;
- return FLUSH_GIVEUP;
-}
-
-static int uv2_3_wait_completion(struct bau_desc *bau_desc,
- struct bau_control *bcp, long try)
-{
- unsigned long descriptor_stat;
- cycles_t ttm;
- u64 mmr_offset = bcp->status_mmr;
- int right_shift = bcp->status_index;
- int desc = bcp->uvhub_cpu;
- long busy_reps = 0;
- struct ptc_stats *stat = bcp->statp;
-
- descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
-
- /* spin on the status MMR, waiting for it to go idle */
- while (descriptor_stat != UV2H_DESC_IDLE) {
- if (descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) {
- /*
- * A h/w bug on the destination side may
- * have prevented the message being marked
- * pending, thus it doesn't get replied to
- * and gets continually nacked until it times
- * out with a SOURCE_TIMEOUT.
- */
- stat->s_stimeout++;
- return FLUSH_GIVEUP;
- } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
- ttm = get_cycles();
-
- /*
- * Our retries may be blocked by all destination
- * swack resources being consumed, and a timeout
- * pending. In that case hardware returns the
- * ERROR that looks like a destination timeout.
- * Without using the extended status we have to
- * deduce from the short time that this was a
- * strong nack.
- */
- if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
- bcp->conseccompletes = 0;
- stat->s_plugged++;
- /* FLUSH_RETRY_PLUGGED causes hang on boot */
- return FLUSH_GIVEUP;
- }
- stat->s_dtimeout++;
- bcp->conseccompletes = 0;
- /* FLUSH_RETRY_TIMEOUT causes hang on boot */
- return FLUSH_GIVEUP;
- } else {
- busy_reps++;
- if (busy_reps > 1000000) {
- /* not to hammer on the clock */
- busy_reps = 0;
- ttm = get_cycles();
- if ((ttm - bcp->send_message) > bcp->timeout_interval)
- return handle_uv2_busy(bcp);
- }
- /*
- * descriptor_stat is still BUSY
- */
- cpu_relax();
- }
- descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
- }
- bcp->conseccompletes++;
- return FLUSH_COMPLETE;
-}
-
-/*
- * Returns the status of current BAU message for cpu desc as a bit field
- * [Error][Busy][Aux]
- */
-static u64 read_status(u64 status_mmr, int index, int desc)
-{
- u64 stat;
-
- stat = ((read_lmmr(status_mmr) >> index) & UV_ACT_STATUS_MASK) << 1;
- stat |= (read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_2) >> desc) & 0x1;
-
- return stat;
-}
-
-static int uv4_wait_completion(struct bau_desc *bau_desc,
- struct bau_control *bcp, long try)
-{
- struct ptc_stats *stat = bcp->statp;
- u64 descriptor_stat;
- u64 mmr = bcp->status_mmr;
- int index = bcp->status_index;
- int desc = bcp->uvhub_cpu;
-
- descriptor_stat = read_status(mmr, index, desc);
-
- /* spin on the status MMR, waiting for it to go idle */
- while (descriptor_stat != UV2H_DESC_IDLE) {
- switch (descriptor_stat) {
- case UV2H_DESC_SOURCE_TIMEOUT:
- stat->s_stimeout++;
- return FLUSH_GIVEUP;
-
- case UV2H_DESC_DEST_TIMEOUT:
- stat->s_dtimeout++;
- bcp->conseccompletes = 0;
- return FLUSH_RETRY_TIMEOUT;
-
- case UV2H_DESC_DEST_STRONG_NACK:
- stat->s_plugged++;
- bcp->conseccompletes = 0;
- return FLUSH_RETRY_PLUGGED;
-
- case UV2H_DESC_DEST_PUT_ERR:
- bcp->conseccompletes = 0;
- return FLUSH_GIVEUP;
-
- default:
- /* descriptor_stat is still BUSY */
- cpu_relax();
- }
- descriptor_stat = read_status(mmr, index, desc);
- }
- bcp->conseccompletes++;
- return FLUSH_COMPLETE;
-}
-
-/*
- * Our retries are blocked by all destination sw ack resources being
- * in use, and a timeout is pending. In that case hardware immediately
- * returns the ERROR that looks like a destination timeout.
- */
-static void destination_plugged(struct bau_desc *bau_desc,
- struct bau_control *bcp,
- struct bau_control *hmaster, struct ptc_stats *stat)
-{
- udelay(bcp->plugged_delay);
- bcp->plugged_tries++;
-
- if (bcp->plugged_tries >= bcp->plugsb4reset) {
- bcp->plugged_tries = 0;
-
- quiesce_local_uvhub(hmaster);
-
- spin_lock(&hmaster->queue_lock);
- reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
-
- end_uvhub_quiesce(hmaster);
-
- bcp->ipi_attempts++;
- stat->s_resets_plug++;
- }
-}
-
-static void destination_timeout(struct bau_desc *bau_desc,
- struct bau_control *bcp, struct bau_control *hmaster,
- struct ptc_stats *stat)
-{
- hmaster->max_concurr = 1;
- bcp->timeout_tries++;
- if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
- bcp->timeout_tries = 0;
-
- quiesce_local_uvhub(hmaster);
-
- spin_lock(&hmaster->queue_lock);
- reset_with_ipi(&bau_desc->distribution, bcp);
- spin_unlock(&hmaster->queue_lock);
-
- end_uvhub_quiesce(hmaster);
-
- bcp->ipi_attempts++;
- stat->s_resets_timeout++;
- }
-}
-
-/*
- * Stop all cpus on a uvhub from using the BAU for a period of time.
- * This is reversed by check_enable.
- */
-static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
-{
- int tcpu;
- struct bau_control *tbcp;
- struct bau_control *hmaster;
- cycles_t tm1;
-
- hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
- if (!bcp->baudisabled) {
- stat->s_bau_disabled++;
- tm1 = get_cycles();
- for_each_present_cpu(tcpu) {
- tbcp = &per_cpu(bau_control, tcpu);
- if (tbcp->uvhub_master == hmaster) {
- tbcp->baudisabled = 1;
- tbcp->set_bau_on_time =
- tm1 + bcp->disabled_period;
- }
- }
- }
- spin_unlock(&hmaster->disable_lock);
-}
-
-static void count_max_concurr(int stat, struct bau_control *bcp,
- struct bau_control *hmaster)
-{
- bcp->plugged_tries = 0;
- bcp->timeout_tries = 0;
- if (stat != FLUSH_COMPLETE)
- return;
- if (bcp->conseccompletes <= bcp->complete_threshold)
- return;
- if (hmaster->max_concurr >= hmaster->max_concurr_const)
- return;
- hmaster->max_concurr++;
-}
-
-static void record_send_stats(cycles_t time1, cycles_t time2,
- struct bau_control *bcp, struct ptc_stats *stat,
- int completion_status, int try)
-{
- cycles_t elapsed;
-
- if (time2 > time1) {
- elapsed = time2 - time1;
- stat->s_time += elapsed;
-
- if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
- bcp->period_requests++;
- bcp->period_time += elapsed;
- if ((elapsed > usec_2_cycles(bcp->cong_response_us)) &&
- (bcp->period_requests > bcp->cong_reps) &&
- ((bcp->period_time / bcp->period_requests) >
- usec_2_cycles(bcp->cong_response_us))) {
- stat->s_congested++;
- disable_for_period(bcp, stat);
- }
- }
- } else
- stat->s_requestor--;
-
- if (completion_status == FLUSH_COMPLETE && try > 1)
- stat->s_retriesok++;
- else if (completion_status == FLUSH_GIVEUP) {
- stat->s_giveup++;
- if (get_cycles() > bcp->period_end)
- bcp->period_giveups = 0;
- bcp->period_giveups++;
- if (bcp->period_giveups == 1)
- bcp->period_end = get_cycles() + bcp->disabled_period;
- if (bcp->period_giveups > bcp->giveup_limit) {
- disable_for_period(bcp, stat);
- stat->s_giveuplimit++;
- }
- }
-}
-
-/*
- * Handle the completion status of a message send.
- */
-static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
- struct bau_control *bcp, struct bau_control *hmaster,
- struct ptc_stats *stat)
-{
- if (completion_status == FLUSH_RETRY_PLUGGED)
- destination_plugged(bau_desc, bcp, hmaster, stat);
- else if (completion_status == FLUSH_RETRY_TIMEOUT)
- destination_timeout(bau_desc, bcp, hmaster, stat);
-}
-
-/*
- * Send a broadcast and wait for it to complete.
- *
- * The flush_mask contains the cpus the broadcast is to be sent to including
- * cpus that are on the local uvhub.
- *
- * Returns 0 if all flushing represented in the mask was done.
- * Returns 1 if it gives up entirely and the original cpu mask is to be
- * returned to the kernel.
- */
-static int uv_flush_send_and_wait(struct cpumask *flush_mask,
- struct bau_control *bcp,
- struct bau_desc *bau_desc)
-{
- int seq_number = 0;
- int completion_stat = 0;
- long try = 0;
- unsigned long index;
- cycles_t time1;
- cycles_t time2;
- struct ptc_stats *stat = bcp->statp;
- struct bau_control *hmaster = bcp->uvhub_master;
- struct uv2_3_bau_msg_header *uv2_3_hdr = NULL;
-
- while (hmaster->uvhub_quiesce)
- cpu_relax();
-
- time1 = get_cycles();
- uv2_3_hdr = &bau_desc->header.uv2_3_hdr;
-
- do {
- if (try == 0) {
- uv2_3_hdr->msg_type = MSG_REGULAR;
- seq_number = bcp->message_number++;
- } else {
- uv2_3_hdr->msg_type = MSG_RETRY;
- stat->s_retry_messages++;
- }
-
- uv2_3_hdr->sequence = seq_number;
- index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
- bcp->send_message = get_cycles();
-
- write_mmr_activation(index);
-
- try++;
- completion_stat = ops.wait_completion(bau_desc, bcp, try);
-
- handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
-
- if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
- bcp->ipi_attempts = 0;
- stat->s_overipilimit++;
- completion_stat = FLUSH_GIVEUP;
- break;
- }
- cpu_relax();
- } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
- (completion_stat == FLUSH_RETRY_TIMEOUT));
-
- time2 = get_cycles();
-
- count_max_concurr(completion_stat, bcp, hmaster);
-
- while (hmaster->uvhub_quiesce)
- cpu_relax();
-
- atomic_dec(&hmaster->active_descriptor_count);
-
- record_send_stats(time1, time2, bcp, stat, completion_stat, try);
-
- if (completion_stat == FLUSH_GIVEUP)
- /* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
- return 1;
- return 0;
-}
-
-/*
- * The BAU is disabled for this uvhub. When the disabled time period has
- * expired re-enable it.
- * Return 0 if it is re-enabled for all cpus on this uvhub.
- */
-static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
-{
- int tcpu;
- struct bau_control *tbcp;
- struct bau_control *hmaster;
-
- hmaster = bcp->uvhub_master;
- spin_lock(&hmaster->disable_lock);
- if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
- stat->s_bau_reenabled++;
- for_each_present_cpu(tcpu) {
- tbcp = &per_cpu(bau_control, tcpu);
- if (tbcp->uvhub_master == hmaster) {
- tbcp->baudisabled = 0;
- tbcp->period_requests = 0;
- tbcp->period_time = 0;
- tbcp->period_giveups = 0;
- }
- }
- spin_unlock(&hmaster->disable_lock);
- return 0;
- }
- spin_unlock(&hmaster->disable_lock);
- return -1;
-}
-
-static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
- int remotes, struct bau_desc *bau_desc)
-{
- stat->s_requestor++;
- stat->s_ntargcpu += remotes + locals;
- stat->s_ntargremotes += remotes;
- stat->s_ntarglocals += locals;
-
- /* uvhub statistics */
- hubs = bau_uvhub_weight(&bau_desc->distribution);
- if (locals) {
- stat->s_ntarglocaluvhub++;
- stat->s_ntargremoteuvhub += (hubs - 1);
- } else
- stat->s_ntargremoteuvhub += hubs;
-
- stat->s_ntarguvhub += hubs;
-
- if (hubs >= 16)
- stat->s_ntarguvhub16++;
- else if (hubs >= 8)
- stat->s_ntarguvhub8++;
- else if (hubs >= 4)
- stat->s_ntarguvhub4++;
- else if (hubs >= 2)
- stat->s_ntarguvhub2++;
- else
- stat->s_ntarguvhub1++;
-}
-
-/*
- * Translate a cpu mask to the uvhub distribution mask in the BAU
- * activation descriptor.
- */
-static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
- struct bau_desc *bau_desc, int *localsp, int *remotesp)
-{
- int cpu;
- int pnode;
- int cnt = 0;
- struct hub_and_pnode *hpp;
-
- for_each_cpu(cpu, flush_mask) {
- /*
- * The distribution vector is a bit map of pnodes, relative
- * to the partition base pnode (and the partition base nasid
- * in the header).
- * Translate cpu to pnode and hub using a local memory array.
- */
- hpp = &bcp->socket_master->thp[cpu];
- pnode = hpp->pnode - bcp->partition_base_pnode;
- bau_uvhub_set(pnode, &bau_desc->distribution);
- cnt++;
- if (hpp->uvhub == bcp->uvhub)
- (*localsp)++;
- else
- (*remotesp)++;
- }
- if (!cnt)
- return 1;
- return 0;
-}
-
-/*
- * globally purge translation cache of a virtual address or all TLB's
- * @cpumask: mask of all cpu's in which the address is to be removed
- * @mm: mm_struct containing virtual address range
- * @start: start virtual address to be removed from TLB
- * @end: end virtual address to be remove from TLB
- * @cpu: the current cpu
- *
- * This is the entry point for initiating any UV global TLB shootdown.
- *
- * Purges the translation caches of all specified processors of the given
- * virtual address, or purges all TLB's on specified processors.
- *
- * The caller has derived the cpumask from the mm_struct. This function
- * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
- *
- * The cpumask is converted into a uvhubmask of the uvhubs containing
- * those cpus.
- *
- * Note that this function should be called with preemption disabled.
- *
- * Returns NULL if all remote flushing was done.
- * Returns pointer to cpumask if some remote flushing remains to be
- * done. The returned pointer is valid till preemption is re-enabled.
- */
-const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
- const struct flush_tlb_info *info)
-{
- unsigned int cpu = smp_processor_id();
- int locals = 0, remotes = 0, hubs = 0;
- struct bau_desc *bau_desc;
- struct cpumask *flush_mask;
- struct ptc_stats *stat;
- struct bau_control *bcp;
- unsigned long descriptor_status, status, address;
-
- bcp = &per_cpu(bau_control, cpu);
-
- if (bcp->nobau)
- return cpumask;
-
- stat = bcp->statp;
- stat->s_enters++;
-
- if (bcp->busy) {
- descriptor_status =
- read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
- status = ((descriptor_status >> (bcp->uvhub_cpu *
- UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
- if (status == UV2H_DESC_BUSY)
- return cpumask;
- bcp->busy = 0;
- }
-
- /* bau was disabled due to slow response */
- if (bcp->baudisabled) {
- if (check_enable(bcp, stat)) {
- stat->s_ipifordisabled++;
- return cpumask;
- }
- }
-
- /*
- * Each sending cpu has a per-cpu mask which it fills from the caller's
- * cpu mask. All cpus are converted to uvhubs and copied to the
- * activation descriptor.
- */
- flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
- /* don't actually do a shootdown of the local cpu */
- cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
-
- if (cpumask_test_cpu(cpu, cpumask))
- stat->s_ntargself++;
-
- bau_desc = bcp->descriptor_base;
- bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
- bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
- if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
- return NULL;
-
- record_send_statistics(stat, locals, hubs, remotes, bau_desc);
-
- if (!info->end || (info->end - info->start) <= PAGE_SIZE)
- address = info->start;
- else
- address = TLB_FLUSH_ALL;
-
- switch (bcp->uvhub_version) {
- case UV_BAU_V2:
- case UV_BAU_V3:
- bau_desc->payload.uv2_3.address = address;
- bau_desc->payload.uv2_3.sending_cpu = cpu;
- break;
- case UV_BAU_V4:
- bau_desc->payload.uv4.address = address;
- bau_desc->payload.uv4.sending_cpu = cpu;
- bau_desc->payload.uv4.qualifier = BAU_DESC_QUALIFIER;
- break;
- }
-
- /*
- * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
- * or 1 if it gave up and the original cpumask should be returned.
- */
- if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
- return NULL;
- else
- return cpumask;
-}
-
-/*
- * Search the message queue for any 'other' unprocessed message with the
- * same software acknowledge resource bit vector as the 'msg' message.
- */
-static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
- struct bau_control *bcp)
-{
- struct bau_pq_entry *msg_next = msg + 1;
- unsigned char swack_vec = msg->swack_vec;
-
- if (msg_next > bcp->queue_last)
- msg_next = bcp->queue_first;
- while (msg_next != msg) {
- if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
- (msg_next->swack_vec == swack_vec))
- return msg_next;
- msg_next++;
- if (msg_next > bcp->queue_last)
- msg_next = bcp->queue_first;
- }
- return NULL;
-}
-
-/*
- * UV2 needs to work around a bug in which an arriving message has not
- * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
- * Such a message must be ignored.
- */
-static void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
-{
- unsigned long mmr_image;
- unsigned char swack_vec;
- struct bau_pq_entry *msg = mdp->msg;
- struct bau_pq_entry *other_msg;
-
- mmr_image = ops.read_l_sw_ack();
- swack_vec = msg->swack_vec;
-
- if ((swack_vec & mmr_image) == 0) {
- /*
- * This message was assigned a swack resource, but no
- * reserved acknowlegment is pending.
- * The bug has prevented this message from setting the MMR.
- */
- /*
- * Some message has set the MMR 'pending' bit; it might have
- * been another message. Look for that message.
- */
- other_msg = find_another_by_swack(msg, bcp);
- if (other_msg) {
- /*
- * There is another. Process this one but do not
- * ack it.
- */
- bau_process_message(mdp, bcp, 0);
- /*
- * Let the natural processing of that other message
- * acknowledge it. Don't get the processing of sw_ack's
- * out of order.
- */
- return;
- }
- }
-
- /*
- * Either the MMR shows this one pending a reply or there is no
- * other message using this sw_ack, so it is safe to acknowledge it.
- */
- bau_process_message(mdp, bcp, 1);
-
- return;
-}
-
-/*
- * The BAU message interrupt comes here. (registered by set_intr_gate)
- * See entry_64.S
- *
- * We received a broadcast assist message.
- *
- * Interrupts are disabled; this interrupt could represent
- * the receipt of several messages.
- *
- * All cores/threads on this hub get this interrupt.
- * The last one to see it does the software ack.
- * (the resource will not be freed until noninterruptable cpus see this
- * interrupt; hardware may timeout the s/w ack and reply ERROR)
- */
-DEFINE_IDTENTRY_SYSVEC(sysvec_uv_bau_message)
-{
- int count = 0;
- cycles_t time_start;
- struct bau_pq_entry *msg;
- struct bau_control *bcp;
- struct ptc_stats *stat;
- struct msg_desc msgdesc;
-
- ack_APIC_irq();
- kvm_set_cpu_l1tf_flush_l1d();
- time_start = get_cycles();
-
- bcp = &per_cpu(bau_control, smp_processor_id());
- stat = bcp->statp;
-
- msgdesc.queue_first = bcp->queue_first;
- msgdesc.queue_last = bcp->queue_last;
-
- msg = bcp->bau_msg_head;
- while (msg->swack_vec) {
- count++;
-
- msgdesc.msg_slot = msg - msgdesc.queue_first;
- msgdesc.msg = msg;
- if (bcp->uvhub_version == UV_BAU_V2)
- process_uv2_message(&msgdesc, bcp);
- else
- /* no error workaround for uv3 */
- bau_process_message(&msgdesc, bcp, 1);
-
- msg++;
- if (msg > msgdesc.queue_last)
- msg = msgdesc.queue_first;
- bcp->bau_msg_head = msg;
- }
- stat->d_time += (get_cycles() - time_start);
- if (!count)
- stat->d_nomsg++;
- else if (count > 1)
- stat->d_multmsg++;
-}
-
-/*
- * Each target uvhub (i.e. a uvhub that has cpu's) needs to have
- * shootdown message timeouts enabled. The timeout does not cause
- * an interrupt, but causes an error message to be returned to
- * the sender.
- */
-static void __init enable_timeouts(void)
-{
- int uvhub;
- int nuvhubs;
- int pnode;
- unsigned long mmr_image;
-
- nuvhubs = uv_num_possible_blades();
-
- for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
- if (!uv_blade_nr_possible_cpus(uvhub))
- continue;
-
- pnode = uv_blade_to_pnode(uvhub);
- mmr_image = read_mmr_misc_control(pnode);
- /*
- * Set the timeout period and then lock it in, in three
- * steps; captures and locks in the period.
- *
- * To program the period, the SOFT_ACK_MODE must be off.
- */
- mmr_image &= ~(1L << SOFTACK_MSHIFT);
- write_mmr_misc_control(pnode, mmr_image);
- /*
- * Set the 4-bit period.
- */
- mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
- mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
- write_mmr_misc_control(pnode, mmr_image);
-
- mmr_image |= (1L << SOFTACK_MSHIFT);
- if (is_uv2_hub()) {
- /* do not touch the legacy mode bit */
- /* hw bug workaround; do not use extended status */
- mmr_image &= ~(1L << UV2_EXT_SHFT);
- } else if (is_uv3_hub()) {
- mmr_image &= ~(1L << PREFETCH_HINT_SHFT);
- mmr_image |= (1L << SB_STATUS_SHFT);
- }
- write_mmr_misc_control(pnode, mmr_image);
- }
-}
-
-static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
-{
- if (*offset < num_possible_cpus())
- return offset;
- return NULL;
-}
-
-static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
-{
- (*offset)++;
- if (*offset < num_possible_cpus())
- return offset;
- return NULL;
-}
-
-static void ptc_seq_stop(struct seq_file *file, void *data)
-{
-}
-
-/*
- * Display the statistics thru /proc/sgi_uv/ptc_statistics
- * 'data' points to the cpu number
- * Note: see the descriptions in stat_description[].
- */
-static int ptc_seq_show(struct seq_file *file, void *data)
-{
- struct ptc_stats *stat;
- struct bau_control *bcp;
- int cpu;
-
- cpu = *(loff_t *)data;
- if (!cpu) {
- seq_puts(file,
- "# cpu bauoff sent stime self locals remotes ncpus localhub ");
- seq_puts(file, "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
- seq_puts(file,
- "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
- seq_puts(file,
- "rok resetp resett giveup sto bz throt disable ");
- seq_puts(file,
- "enable wars warshw warwaits enters ipidis plugged ");
- seq_puts(file,
- "ipiover glim cong swack recv rtime all one mult ");
- seq_puts(file, "none retry canc nocan reset rcan\n");
- }
- if (cpu < num_possible_cpus() && cpu_online(cpu)) {
- bcp = &per_cpu(bau_control, cpu);
- if (bcp->nobau) {
- seq_printf(file, "cpu %d bau disabled\n", cpu);
- return 0;
- }
- stat = bcp->statp;
- /* source side statistics */
- seq_printf(file,
- "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
- cpu, bcp->nobau, stat->s_requestor,
- cycles_2_us(stat->s_time),
- stat->s_ntargself, stat->s_ntarglocals,
- stat->s_ntargremotes, stat->s_ntargcpu,
- stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
- stat->s_ntarguvhub, stat->s_ntarguvhub16);
- seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
- stat->s_ntarguvhub8, stat->s_ntarguvhub4,
- stat->s_ntarguvhub2, stat->s_ntarguvhub1,
- stat->s_dtimeout, stat->s_strongnacks);
- seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
- stat->s_retry_messages, stat->s_retriesok,
- stat->s_resets_plug, stat->s_resets_timeout,
- stat->s_giveup, stat->s_stimeout,
- stat->s_busy, stat->s_throttles);
- seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
- stat->s_bau_disabled, stat->s_bau_reenabled,
- stat->s_uv2_wars, stat->s_uv2_wars_hw,
- stat->s_uv2_war_waits, stat->s_enters,
- stat->s_ipifordisabled, stat->s_plugged,
- stat->s_overipilimit, stat->s_giveuplimit,
- stat->s_congested);
-
- /* destination side statistics */
- seq_printf(file,
- "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
- ops.read_g_sw_ack(uv_cpu_to_pnode(cpu)),
- stat->d_requestee, cycles_2_us(stat->d_time),
- stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
- stat->d_nomsg, stat->d_retries, stat->d_canceled,
- stat->d_nocanceled, stat->d_resets,
- stat->d_rcanceled);
- }
- return 0;
-}
-
-/*
- * Display the tunables thru debugfs
- */
-static ssize_t tunables_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- char *buf;
- int ret;
-
- buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
- "max_concur plugged_delay plugsb4reset timeoutsb4reset",
- "ipi_reset_limit complete_threshold congested_response_us",
- "congested_reps disabled_period giveup_limit",
- max_concurr, plugged_delay, plugsb4reset,
- timeoutsb4reset, ipi_reset_limit, complete_threshold,
- congested_respns_us, congested_reps, disabled_period,
- giveup_limit);
-
- if (!buf)
- return -ENOMEM;
-
- ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
- kfree(buf);
- return ret;
-}
-
-/*
- * handle a write to /proc/sgi_uv/ptc_statistics
- * -1: reset the statistics
- * 0: display meaning of the statistics
- */
-static ssize_t ptc_proc_write(struct file *file, const char __user *user,
- size_t count, loff_t *data)
-{
- int cpu;
- int i;
- int elements;
- long input_arg;
- char optstr[64];
- struct ptc_stats *stat;
-
- if (count == 0 || count > sizeof(optstr))
- return -EINVAL;
- if (copy_from_user(optstr, user, count))
- return -EFAULT;
- optstr[count - 1] = '\0';
-
- if (!strcmp(optstr, "on")) {
- set_bau_on();
- return count;
- } else if (!strcmp(optstr, "off")) {
- set_bau_off();
- return count;
- }
-
- if (kstrtol(optstr, 10, &input_arg) < 0) {
- pr_debug("%s is invalid\n", optstr);
- return -EINVAL;
- }
-
- if (input_arg == 0) {
- elements = ARRAY_SIZE(stat_description);
- pr_debug("# cpu: cpu number\n");
- pr_debug("Sender statistics:\n");
- for (i = 0; i < elements; i++)
- pr_debug("%s\n", stat_description[i]);
- } else if (input_arg == -1) {
- for_each_present_cpu(cpu) {
- stat = &per_cpu(ptcstats, cpu);
- memset(stat, 0, sizeof(struct ptc_stats));
- }
- }
-
- return count;
-}
-
-static int local_atoi(const char *name)
-{
- int val = 0;
-
- for (;; name++) {
- switch (*name) {
- case '0' ... '9':
- val = 10*val+(*name-'0');
- break;
- default:
- return val;
- }
- }
-}
-
-/*
- * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables.
- * Zero values reset them to defaults.
- */
-static int parse_tunables_write(struct bau_control *bcp, char *instr,
- int count)
-{
- char *p;
- char *q;
- int cnt = 0;
- int val;
- int e = ARRAY_SIZE(tunables);
-
- p = instr + strspn(instr, WHITESPACE);
- q = p;
- for (; *p; p = q + strspn(q, WHITESPACE)) {
- q = p + strcspn(p, WHITESPACE);
- cnt++;
- if (q == p)
- break;
- }
- if (cnt != e) {
- pr_info("bau tunable error: should be %d values\n", e);
- return -EINVAL;
- }
-
- p = instr + strspn(instr, WHITESPACE);
- q = p;
- for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
- q = p + strcspn(p, WHITESPACE);
- val = local_atoi(p);
- switch (cnt) {
- case 0:
- if (val == 0) {
- max_concurr = MAX_BAU_CONCURRENT;
- max_concurr_const = MAX_BAU_CONCURRENT;
- continue;
- }
- if (val < 1 || val > bcp->cpus_in_uvhub) {
- pr_debug(
- "Error: BAU max concurrent %d is invalid\n",
- val);
- return -EINVAL;
- }
- max_concurr = val;
- max_concurr_const = val;
- continue;
- default:
- if (val == 0)
- *tunables[cnt].tunp = tunables[cnt].deflt;
- else
- *tunables[cnt].tunp = val;
- continue;
- }
- }
- return 0;
-}
-
-/*
- * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables)
- */
-static ssize_t tunables_write(struct file *file, const char __user *user,
- size_t count, loff_t *data)
-{
- int cpu;
- int ret;
- char instr[100];
- struct bau_control *bcp;
-
- if (count == 0 || count > sizeof(instr)-1)
- return -EINVAL;
- if (copy_from_user(instr, user, count))
- return -EFAULT;
-
- instr[count] = '\0';
-
- cpu = get_cpu();
- bcp = &per_cpu(bau_control, cpu);
- ret = parse_tunables_write(bcp, instr, count);
- put_cpu();
- if (ret)
- return ret;
-
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->max_concurr = max_concurr;
- bcp->max_concurr_const = max_concurr;
- bcp->plugged_delay = plugged_delay;
- bcp->plugsb4reset = plugsb4reset;
- bcp->timeoutsb4reset = timeoutsb4reset;
- bcp->ipi_reset_limit = ipi_reset_limit;
- bcp->complete_threshold = complete_threshold;
- bcp->cong_response_us = congested_respns_us;
- bcp->cong_reps = congested_reps;
- bcp->disabled_period = sec_2_cycles(disabled_period);
- bcp->giveup_limit = giveup_limit;
- }
- return count;
-}
-
-static const struct seq_operations uv_ptc_seq_ops = {
- .start = ptc_seq_start,
- .next = ptc_seq_next,
- .stop = ptc_seq_stop,
- .show = ptc_seq_show
-};
-
-static int ptc_proc_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &uv_ptc_seq_ops);
-}
-
-static int tunables_open(struct inode *inode, struct file *file)
-{
- return 0;
-}
-
-static const struct proc_ops uv_ptc_proc_ops = {
- .proc_open = ptc_proc_open,
- .proc_read = seq_read,
- .proc_write = ptc_proc_write,
- .proc_lseek = seq_lseek,
- .proc_release = seq_release,
-};
-
-static const struct file_operations tunables_fops = {
- .open = tunables_open,
- .read = tunables_read,
- .write = tunables_write,
- .llseek = default_llseek,
-};
-
-static int __init uv_ptc_init(void)
-{
- struct proc_dir_entry *proc_uv_ptc;
-
- if (!is_uv_system())
- return 0;
-
- proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
- &uv_ptc_proc_ops);
- if (!proc_uv_ptc) {
- pr_err("unable to create %s proc entry\n",
- UV_PTC_BASENAME);
- return -EINVAL;
- }
-
- tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
- debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, tunables_dir, NULL,
- &tunables_fops);
- return 0;
-}
-
-/*
- * Initialize the sending side's sending buffers.
- */
-static void activation_descriptor_init(int node, int pnode, int base_pnode)
-{
- int i;
- int cpu;
- unsigned long gpa;
- unsigned long m;
- unsigned long n;
- size_t dsize;
- struct bau_desc *bau_desc;
- struct bau_desc *bd2;
- struct uv2_3_bau_msg_header *uv2_3_hdr;
- struct bau_control *bcp;
-
- /*
- * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC)
- * per cpu; and one per cpu on the uvhub (ADP_SZ)
- */
- dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
- bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
- BUG_ON(!bau_desc);
-
- gpa = uv_gpa(bau_desc);
- n = uv_gpa_to_gnode(gpa);
- m = ops.bau_gpa_to_offset(gpa);
-
- /* the 14-bit pnode */
- write_mmr_descriptor_base(pnode,
- (n << UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT | m));
- /*
- * Initializing all 8 (ITEMS_PER_DESC) descriptors for each
- * cpu even though we only use the first one; one descriptor can
- * describe a broadcast to 256 uv hubs.
- */
- for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
- memset(bd2, 0, sizeof(struct bau_desc));
- /*
- * BIOS uses legacy mode, but uv2 and uv3 hardware always
- * uses native mode for selective broadcasts.
- */
- uv2_3_hdr = &bd2->header.uv2_3_hdr;
- uv2_3_hdr->swack_flag = 1;
- uv2_3_hdr->base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
- uv2_3_hdr->dest_subnodeid = UV_LB_SUBNODEID;
- uv2_3_hdr->command = UV_NET_ENDPOINT_INTD;
- }
- for_each_present_cpu(cpu) {
- if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
- continue;
- bcp = &per_cpu(bau_control, cpu);
- bcp->descriptor_base = bau_desc;
- }
-}
-
-/*
- * initialize the destination side's receiving buffers
- * entered for each uvhub in the partition
- * - node is first node (kernel memory notion) on the uvhub
- * - pnode is the uvhub's physical identifier
- */
-static void pq_init(int node, int pnode)
-{
- int cpu;
- size_t plsize;
- char *cp;
- void *vp;
- unsigned long gnode, first, last, tail;
- struct bau_pq_entry *pqp;
- struct bau_control *bcp;
-
- plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
- vp = kmalloc_node(plsize, GFP_KERNEL, node);
- BUG_ON(!vp);
-
- pqp = (struct bau_pq_entry *)vp;
- cp = (char *)pqp + 31;
- pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
-
- for_each_present_cpu(cpu) {
- if (pnode != uv_cpu_to_pnode(cpu))
- continue;
- /* for every cpu on this pnode: */
- bcp = &per_cpu(bau_control, cpu);
- bcp->queue_first = pqp;
- bcp->bau_msg_head = pqp;
- bcp->queue_last = pqp + (DEST_Q_SIZE - 1);
- }
-
- first = ops.bau_gpa_to_offset(uv_gpa(pqp));
- last = ops.bau_gpa_to_offset(uv_gpa(pqp + (DEST_Q_SIZE - 1)));
-
- /*
- * Pre UV4, the gnode is required to locate the payload queue
- * and the payload queue tail must be maintained by the kernel.
- */
- bcp = &per_cpu(bau_control, smp_processor_id());
- if (bcp->uvhub_version <= UV_BAU_V3) {
- tail = first;
- gnode = uv_gpa_to_gnode(uv_gpa(pqp));
- first = (gnode << UV_PAYLOADQ_GNODE_SHIFT) | tail;
- write_mmr_payload_tail(pnode, tail);
- }
-
- ops.write_payload_first(pnode, first);
- ops.write_payload_last(pnode, last);
-
- /* in effect, all msg_type's are set to MSG_NOOP */
- memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
-}
-
-/*
- * Initialization of each UV hub's structures
- */
-static void __init init_uvhub(int uvhub, int vector, int base_pnode)
-{
- int node;
- int pnode;
- unsigned long apicid;
-
- node = uvhub_to_first_node(uvhub);
- pnode = uv_blade_to_pnode(uvhub);
-
- activation_descriptor_init(node, pnode, base_pnode);
-
- pq_init(node, pnode);
- /*
- * The below initialization can't be in firmware because the
- * messaging IRQ will be determined by the OS.
- */
- apicid = uvhub_to_first_apicid(uvhub);
- write_mmr_data_config(pnode, ((apicid << 32) | vector));
-}
-
-/*
- * We will set BAU_MISC_CONTROL with a timeout period.
- * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
- * So the destination timeout period has to be calculated from them.
- */
-static int calculate_destination_timeout(void)
-{
- unsigned long mmr_image;
- int mult1;
- int base;
- int ret;
-
- /* same destination timeout for uv2 and uv3 */
- /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
- mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
- mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
- if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
- base = 80;
- else
- base = 10;
- mult1 = mmr_image & UV2_ACK_MASK;
- ret = mult1 * base;
-
- return ret;
-}
-
-static void __init init_per_cpu_tunables(void)
-{
- int cpu;
- struct bau_control *bcp;
-
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
- bcp->baudisabled = 0;
- if (nobau)
- bcp->nobau = true;
- bcp->statp = &per_cpu(ptcstats, cpu);
- /* time interval to catch a hardware stay-busy bug */
- bcp->timeout_interval = usec_2_cycles(2*timeout_us);
- bcp->max_concurr = max_concurr;
- bcp->max_concurr_const = max_concurr;
- bcp->plugged_delay = plugged_delay;
- bcp->plugsb4reset = plugsb4reset;
- bcp->timeoutsb4reset = timeoutsb4reset;
- bcp->ipi_reset_limit = ipi_reset_limit;
- bcp->complete_threshold = complete_threshold;
- bcp->cong_response_us = congested_respns_us;
- bcp->cong_reps = congested_reps;
- bcp->disabled_period = sec_2_cycles(disabled_period);
- bcp->giveup_limit = giveup_limit;
- spin_lock_init(&bcp->queue_lock);
- spin_lock_init(&bcp->uvhub_lock);
- spin_lock_init(&bcp->disable_lock);
- }
-}
-
-/*
- * Scan all cpus to collect blade and socket summaries.
- */
-static int __init get_cpu_topology(int base_pnode,
- struct uvhub_desc *uvhub_descs,
- unsigned char *uvhub_mask)
-{
- int cpu;
- int pnode;
- int uvhub;
- int socket;
- struct bau_control *bcp;
- struct uvhub_desc *bdp;
- struct socket_desc *sdp;
-
- for_each_present_cpu(cpu) {
- bcp = &per_cpu(bau_control, cpu);
-
- memset(bcp, 0, sizeof(struct bau_control));
-
- pnode = uv_cpu_hub_info(cpu)->pnode;
- if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
- pr_emerg(
- "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
- cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
- return 1;
- }
-
- bcp->osnode = cpu_to_node(cpu);
- bcp->partition_base_pnode = base_pnode;
-
- uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
- *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
- bdp = &uvhub_descs[uvhub];
-
- bdp->num_cpus++;
- bdp->uvhub = uvhub;
- bdp->pnode = pnode;
-
- /* kludge: 'assuming' one node per socket, and assuming that
- disabling a socket just leaves a gap in node numbers */
- socket = bcp->osnode & 1;
- bdp->socket_mask |= (1 << socket);
- sdp = &bdp->socket[socket];
- sdp->cpu_number[sdp->num_cpus] = cpu;
- sdp->num_cpus++;
- if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
- pr_emerg("%d cpus per socket invalid\n",
- sdp->num_cpus);
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * Each socket is to get a local array of pnodes/hubs.
- */
-static void make_per_cpu_thp(struct bau_control *smaster)
-{
- int cpu;
- size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
-
- smaster->thp = kzalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
- for_each_present_cpu(cpu) {
- smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
- smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
- }
-}
-
-/*
- * Each uvhub is to get a local cpumask.
- */
-static void make_per_hub_cpumask(struct bau_control *hmaster)
-{
- int sz = sizeof(cpumask_t);
-
- hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode);
-}
-
-/*
- * Initialize all the per_cpu information for the cpu's on a given socket,
- * given what has been gathered into the socket_desc struct.
- * And reports the chosen hub and socket masters back to the caller.
- */
-static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
- struct bau_control **smasterp,
- struct bau_control **hmasterp)
-{
- int i, cpu, uvhub_cpu;
- struct bau_control *bcp;
-
- for (i = 0; i < sdp->num_cpus; i++) {
- cpu = sdp->cpu_number[i];
- bcp = &per_cpu(bau_control, cpu);
- bcp->cpu = cpu;
- if (i == 0) {
- *smasterp = bcp;
- if (!(*hmasterp))
- *hmasterp = bcp;
- }
- bcp->cpus_in_uvhub = bdp->num_cpus;
- bcp->cpus_in_socket = sdp->num_cpus;
- bcp->socket_master = *smasterp;
- bcp->uvhub = bdp->uvhub;
- if (is_uv2_hub())
- bcp->uvhub_version = UV_BAU_V2;
- else if (is_uv3_hub())
- bcp->uvhub_version = UV_BAU_V3;
- else if (is_uv4_hub())
- bcp->uvhub_version = UV_BAU_V4;
- else {
- pr_emerg("uvhub version not 1, 2, 3, or 4\n");
- return 1;
- }
- bcp->uvhub_master = *hmasterp;
- uvhub_cpu = uv_cpu_blade_processor_id(cpu);
- bcp->uvhub_cpu = uvhub_cpu;
-
- /*
- * The ERROR and BUSY status registers are located pairwise over
- * the STATUS_0 and STATUS_1 mmrs; each an array[32] of 2 bits.
- */
- if (uvhub_cpu < UV_CPUS_PER_AS) {
- bcp->status_mmr = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
- bcp->status_index = uvhub_cpu * UV_ACT_STATUS_SIZE;
- } else {
- bcp->status_mmr = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
- bcp->status_index = (uvhub_cpu - UV_CPUS_PER_AS)
- * UV_ACT_STATUS_SIZE;
- }
-
- if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
- pr_emerg("%d cpus per uvhub invalid\n",
- bcp->uvhub_cpu);
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * Summarize the blade and socket topology into the per_cpu structures.
- */
-static int __init summarize_uvhub_sockets(int nuvhubs,
- struct uvhub_desc *uvhub_descs,
- unsigned char *uvhub_mask)
-{
- int socket;
- int uvhub;
- unsigned short socket_mask;
-
- for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
- struct uvhub_desc *bdp;
- struct bau_control *smaster = NULL;
- struct bau_control *hmaster = NULL;
-
- if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
- continue;
-
- bdp = &uvhub_descs[uvhub];
- socket_mask = bdp->socket_mask;
- socket = 0;
- while (socket_mask) {
- struct socket_desc *sdp;
- if ((socket_mask & 1)) {
- sdp = &bdp->socket[socket];
- if (scan_sock(sdp, bdp, &smaster, &hmaster))
- return 1;
- make_per_cpu_thp(smaster);
- }
- socket++;
- socket_mask = (socket_mask >> 1);
- }
- make_per_hub_cpumask(hmaster);
- }
- return 0;
-}
-
-/*
- * initialize the bau_control structure for each cpu
- */
-static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
-{
- struct uvhub_desc *uvhub_descs;
- unsigned char *uvhub_mask = NULL;
-
- if (is_uv3_hub() || is_uv2_hub())
- timeout_us = calculate_destination_timeout();
-
- uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
- if (!uvhub_descs)
- goto fail;
-
- uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
- if (!uvhub_mask)
- goto fail;
-
- if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
- goto fail;
-
- if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
- goto fail;
-
- kfree(uvhub_descs);
- kfree(uvhub_mask);
- init_per_cpu_tunables();
- return 0;
-
-fail:
- kfree(uvhub_descs);
- kfree(uvhub_mask);
- return 1;
-}
-
-static const struct bau_operations uv2_3_bau_ops __initconst = {
- .bau_gpa_to_offset = uv_gpa_to_offset,
- .read_l_sw_ack = read_mmr_sw_ack,
- .read_g_sw_ack = read_gmmr_sw_ack,
- .write_l_sw_ack = write_mmr_sw_ack,
- .write_g_sw_ack = write_gmmr_sw_ack,
- .write_payload_first = write_mmr_payload_first,
- .write_payload_last = write_mmr_payload_last,
- .wait_completion = uv2_3_wait_completion,
-};
-
-static const struct bau_operations uv4_bau_ops __initconst = {
- .bau_gpa_to_offset = uv_gpa_to_soc_phys_ram,
- .read_l_sw_ack = read_mmr_proc_sw_ack,
- .read_g_sw_ack = read_gmmr_proc_sw_ack,
- .write_l_sw_ack = write_mmr_proc_sw_ack,
- .write_g_sw_ack = write_gmmr_proc_sw_ack,
- .write_payload_first = write_mmr_proc_payload_first,
- .write_payload_last = write_mmr_proc_payload_last,
- .wait_completion = uv4_wait_completion,
-};
-
-/*
- * Initialization of BAU-related structures
- */
-static int __init uv_bau_init(void)
-{
- int uvhub;
- int pnode;
- int nuvhubs;
- int cur_cpu;
- int cpus;
- int vector;
- cpumask_var_t *mask;
-
- if (!is_uv_system())
- return 0;
-
- if (is_uv4_hub())
- ops = uv4_bau_ops;
- else if (is_uv3_hub())
- ops = uv2_3_bau_ops;
- else if (is_uv2_hub())
- ops = uv2_3_bau_ops;
-
- nuvhubs = uv_num_possible_blades();
- if (nuvhubs < 2) {
- pr_crit("UV: BAU disabled - insufficient hub count\n");
- goto err_bau_disable;
- }
-
- for_each_possible_cpu(cur_cpu) {
- mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
- zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
- }
-
- uv_base_pnode = 0x7fffffff;
- for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
- cpus = uv_blade_nr_possible_cpus(uvhub);
- if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
- uv_base_pnode = uv_blade_to_pnode(uvhub);
- }
-
- /* software timeouts are not supported on UV4 */
- if (is_uv3_hub() || is_uv2_hub())
- enable_timeouts();
-
- if (init_per_cpu(nuvhubs, uv_base_pnode)) {
- pr_crit("UV: BAU disabled - per CPU init failed\n");
- goto err_bau_disable;
- }
-
- vector = UV_BAU_MESSAGE;
- for_each_possible_blade(uvhub) {
- if (uv_blade_nr_possible_cpus(uvhub))
- init_uvhub(uvhub, vector, uv_base_pnode);
- }
-
- for_each_possible_blade(uvhub) {
- if (uv_blade_nr_possible_cpus(uvhub)) {
- unsigned long val;
- unsigned long mmr;
- pnode = uv_blade_to_pnode(uvhub);
- /* INIT the bau */
- val = 1L << 63;
- write_gmmr_activation(pnode, val);
- mmr = 1; /* should be 1 to broadcast to both sockets */
- write_mmr_data_broadcast(pnode, mmr);
- }
- }
-
- return 0;
-
-err_bau_disable:
-
- for_each_possible_cpu(cur_cpu)
- free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));
-
- set_bau_off();
- nobau_perm = 1;
-
- return -EINVAL;
-}
-core_initcall(uv_bau_init);
-fs_initcall(uv_ptc_init);
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index abb6075397f0..18ca2261cc9a 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -90,15 +90,15 @@ static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq,
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret >= 0) {
- if (info->uv_limit == UV_AFFINITY_CPU)
+ if (info->uv.limit == UV_AFFINITY_CPU)
irq_set_status_flags(virq, IRQ_NO_BALANCING);
else
irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
- chip_data->pnode = uv_blade_to_pnode(info->uv_blade);
- chip_data->offset = info->uv_offset;
+ chip_data->pnode = uv_blade_to_pnode(info->uv.blade);
+ chip_data->offset = info->uv.offset;
irq_domain_set_info(domain, virq, virq, &uv_irq_chip, chip_data,
- handle_percpu_irq, NULL, info->uv_name);
+ handle_percpu_irq, NULL, info->uv.name);
} else {
kfree(chip_data);
}
@@ -193,10 +193,10 @@ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
init_irq_alloc_info(&info, cpumask_of(cpu));
info.type = X86_IRQ_ALLOC_TYPE_UV;
- info.uv_limit = limit;
- info.uv_blade = mmr_blade;
- info.uv_offset = mmr_offset;
- info.uv_name = irq_name;
+ info.uv.limit = limit;
+ info.uv.blade = mmr_blade;
+ info.uv.offset = mmr_offset;
+ info.uv.name = irq_name;
return irq_domain_alloc_irqs(domain, 1,
uv_blade_to_memory_nid(mmr_blade), &info);
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 9d08ff5a755e..eafc530c8767 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -2,8 +2,9 @@
/*
* SGI NMI support routines
*
- * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Mike Travis
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Mike Travis
*/
#include <linux/cpu.h>
@@ -54,6 +55,19 @@ static struct uv_hub_nmi_s **uv_hub_nmi_list;
DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
+/* Newer SMM NMI handler, not present in all systems */
+static unsigned long uvh_nmi_mmrx; /* UVH_EVENT_OCCURRED0/1 */
+static unsigned long uvh_nmi_mmrx_clear; /* UVH_EVENT_OCCURRED0/1_ALIAS */
+static int uvh_nmi_mmrx_shift; /* UVH_EVENT_OCCURRED0/1_EXTIO_INT0_SHFT */
+static char *uvh_nmi_mmrx_type; /* "EXTIO_INT0" */
+
+/* Non-zero indicates newer SMM NMI handler present */
+static unsigned long uvh_nmi_mmrx_supported; /* UVH_EXTIO_INT0_BROADCAST */
+
+/* Indicates to BIOS that we want to use the newer SMM NMI handler */
+static unsigned long uvh_nmi_mmrx_req; /* UVH_BIOS_KERNEL_MMR_ALIAS_2 */
+static int uvh_nmi_mmrx_req_shift; /* 62 */
+
/* UV hubless values */
#define NMI_CONTROL_PORT 0x70
#define NMI_DUMMY_PORT 0x71
@@ -227,13 +241,41 @@ static inline bool uv_nmi_action_is(const char *action)
/* Setup which NMI support is present in system */
static void uv_nmi_setup_mmrs(void)
{
- if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) {
- uv_write_local_mmr(UVH_NMI_MMRX_REQ,
- 1UL << UVH_NMI_MMRX_REQ_SHIFT);
- nmi_mmr = UVH_NMI_MMRX;
- nmi_mmr_clear = UVH_NMI_MMRX_CLEAR;
- nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT;
- pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE);
+ /* First determine arch specific MMRs to handshake with BIOS */
+ if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {
+ uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
+ uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
+ uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
+ uvh_nmi_mmrx_type = "OCRD0-EXTIO_INT0";
+
+ uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
+ uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
+ uvh_nmi_mmrx_req_shift = 62;
+
+ } else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) {
+ uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
+ uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
+ uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
+ uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
+
+ uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
+ uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
+ uvh_nmi_mmrx_req_shift = 62;
+
+ } else {
+ pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n",
+ __func__);
+ return;
+ }
+
+ /* Then find out if new NMI is supported */
+ if (likely(uv_read_local_mmr(uvh_nmi_mmrx_supported))) {
+ uv_write_local_mmr(uvh_nmi_mmrx_req,
+ 1UL << uvh_nmi_mmrx_req_shift);
+ nmi_mmr = uvh_nmi_mmrx;
+ nmi_mmr_clear = uvh_nmi_mmrx_clear;
+ nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift;
+ pr_info("UV: SMI NMI support: %s\n", uvh_nmi_mmrx_type);
} else {
nmi_mmr = UVH_NMI_MMR;
nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
@@ -1049,5 +1091,5 @@ void __init uv_nmi_setup_hubless(void)
/* Ensure NMI enabled in Processor Interface Reg: */
uv_reassert_nmi();
uv_register_nmi_notifier();
- pr_info("UV: Hubless NMI enabled\n");
+ pr_info("UV: PCH NMI enabled\n");
}
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index f82a1337a608..54663f3e00cb 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -2,6 +2,7 @@
/*
* SGI RTC clock/timer routines.
*
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) Dimitri Sivanich
*/
@@ -52,7 +53,7 @@ struct uv_rtc_timer_head {
struct {
int lcpu; /* systemwide logical cpu number */
u64 expires; /* next timer expiration for this cpu */
- } cpu[1];
+ } cpu[];
};
/*
@@ -84,10 +85,8 @@ static void uv_rtc_send_IPI(int cpu)
/* Check for an RTC interrupt pending */
static int uv_intr_pending(int pnode)
{
- if (is_uvx_hub())
- return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) &
- UVXH_EVENT_OCCURRED2_RTC_1_MASK;
- return 0;
+ return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED2) &
+ UVH_EVENT_OCCURRED2_RTC_1_MASK;
}
/* Setup interrupt and return non-zero if early expiration occurred. */
@@ -101,8 +100,8 @@ static int uv_setup_intr(int cpu, u64 expires)
UVH_RTC1_INT_CONFIG_M_MASK);
uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
- uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS,
- UVXH_EVENT_OCCURRED2_RTC_1_MASK);
+ uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED2_ALIAS,
+ UVH_EVENT_OCCURRED2_RTC_1_MASK);
val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
@@ -148,9 +147,8 @@ static __init int uv_rtc_allocate_timers(void)
struct uv_rtc_timer_head *head = blade_info[bid];
if (!head) {
- head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
- (uv_blade_nr_possible_cpus(bid) *
- 2 * sizeof(u64)),
+ head = kmalloc_node(struct_size(head, cpu,
+ uv_blade_nr_possible_cpus(bid)),
GFP_KERNEL, nid);
if (!head) {
uv_rtc_deallocate_timers();
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 2961234d0795..7b37a412f829 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -14,9 +14,9 @@
#include "../boot/string.h"
-u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory");
-struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(".kexec-purgatory");
static int verify_sha256_digest(void)
{
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 1ed1208931e0..22fda7d99159 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -9,6 +9,7 @@
#include <asm/realmode.h>
#include <asm/tlbflush.h>
#include <asm/crash.h>
+#include <asm/sev-es.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -38,6 +39,25 @@ void __init reserve_real_mode(void)
crash_reserve_low_1M();
}
+static void sme_sev_setup_real_mode(struct trampoline_header *th)
+{
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ if (sme_active())
+ th->flags |= TH_FLAGS_SME_ACTIVE;
+
+ if (sev_es_active()) {
+ /*
+ * Skip the call to verify_cpu() in secondary_startup_64 as it
+ * will cause #VC exceptions when the AP can't handle them yet.
+ */
+ th->start = (u64) secondary_startup_64_no_verify;
+
+ if (sev_es_setup_ap_jump_table(real_mode_header))
+ panic("Failed to get/update SEV-ES AP Jump Table");
+ }
+#endif
+}
+
static void __init setup_real_mode(void)
{
u16 real_mode_seg;
@@ -104,13 +124,13 @@ static void __init setup_real_mode(void)
*trampoline_cr4_features = mmu_cr4_features;
trampoline_header->flags = 0;
- if (sme_active())
- trampoline_header->flags |= TH_FLAGS_SME_ACTIVE;
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
trampoline_pgd[511] = init_top_pgt[511].pgd;
#endif
+
+ sme_sev_setup_real_mode(trampoline_header);
}
/*
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index af04512c02d9..8c1db5bf5d78 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -20,6 +20,9 @@ SYM_DATA_START(real_mode_header)
/* SMP trampoline */
.long pa_trampoline_start
.long pa_trampoline_header
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ .long pa_sev_es_trampoline_start
+#endif
#ifdef CONFIG_X86_64
.long pa_trampoline_pgd;
#endif
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 251758ed7443..84c5d1b33d10 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -56,6 +56,7 @@ SYM_CODE_START(trampoline_start)
testl %eax, %eax # Check for return code
jnz no_longmode
+.Lswitch_to_protected:
/*
* GDT tables in non default location kernel can be beyond 16MB and
* lgdt will not be able to load the address as in real mode default
@@ -80,6 +81,25 @@ no_longmode:
jmp no_longmode
SYM_CODE_END(trampoline_start)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+/* SEV-ES supports non-zero IP for entry points - no alignment needed */
+SYM_CODE_START(sev_es_trampoline_start)
+ cli # We should be safe anyway
+
+ LJMPW_RM(1f)
+1:
+ mov %cs, %ax # Code and data in the same place
+ mov %ax, %ds
+ mov %ax, %es
+ mov %ax, %ss
+
+ # Setup stack
+ movl $rm_stack_end, %esp
+
+ jmp .Lswitch_to_protected
+SYM_CODE_END(sev_es_trampoline_start)
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
+
#include "../kernel/verify_cpu.S"
.section ".text32","ax"
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index a42015b305f4..af38469afd14 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -362,6 +362,9 @@ function convert_operands(count,opnd, i,j,imm,mod)
END {
if (awkchecked != "")
exit 1
+
+ print "#ifndef __BOOT_COMPRESSED\n"
+
# print escape opcode map's array
print "/* Escape opcode map array */"
print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
@@ -388,6 +391,51 @@ END {
for (j = 0; j < max_lprefix; j++)
if (atable[i,j])
print " ["i"]["j"] = "atable[i,j]","
- print "};"
+ print "};\n"
+
+ print "#else /* !__BOOT_COMPRESSED */\n"
+
+ print "/* Escape opcode map array */"
+ print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* Group opcode map array */"
+ print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* AVX opcode map array */"
+ print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "static void inat_init_tables(void)"
+ print "{"
+
+ # print escape opcode map's array
+ print "\t/* Print Escape opcode map array */"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
+ print ""
+
+ # print group opcode map's array
+ print "\t/* Print Group opcode map array */"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
+ print ""
+ # print AVX opcode map's array
+ print "\t/* Print AVX opcode map array */"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+
+ print "}"
+ print "#endif"
}
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index ff6bba2c8ab6..b07824500363 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -20,22 +20,6 @@
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-/*
- * Note: when you get a NULL pointer exception here this means someone
- * passed in an incorrect kernel address to one of these functions.
- *
- * If you use these functions directly please don't forget the
- * access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
- int len, __wsum sum)
-{
- memcpy(dst, src, len);
- return csum_partial(dst, len, sum);
-}
-
/**
* csum_fold - Fold and invert a 32bit checksum.
* sum: 32bit unfolded sum
diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h
index b9ac7c9eb72c..0b13c2947ad1 100644
--- a/arch/x86/um/asm/checksum_32.h
+++ b/arch/x86/um/asm/checksum_32.h
@@ -35,27 +35,4 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
return csum_fold(sum);
}
-/*
- * Copy and checksum to user
- */
-#define HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user(const void *src,
- void __user *dst,
- int len, __wsum sum, int *err_ptr)
-{
- if (access_ok(dst, len)) {
- if (copy_to_user(dst, src, len)) {
- *err_ptr = -EFAULT;
- return (__force __wsum)-1;
- }
-
- return csum_partial(src, len, sum);
- }
-
- if (len)
- *err_ptr = -EFAULT;
-
- return (__force __wsum)-1; /* invalid checksum */
-}
-
#endif
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index 09a085bde0d4..1401899dee9b 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -52,14 +52,6 @@ static const int reg_offsets[] =
int putreg(struct task_struct *child, int regno, unsigned long value)
{
-#ifdef TIF_IA32
- /*
- * Some code in the 64bit emulation may not be 64bit clean.
- * Don't take any chances.
- */
- if (test_tsk_thread_flag(child, TIF_IA32))
- value &= 0xffffffff;
-#endif
switch (regno) {
case R8:
case R9:
@@ -137,10 +129,7 @@ int poke_user(struct task_struct *child, long addr, long data)
unsigned long getreg(struct task_struct *child, int regno)
{
unsigned long mask = ~0UL;
-#ifdef TIF_IA32
- if (test_tsk_thread_flag(child, TIF_IA32))
- mask = 0xffffffff;
-#endif
+
switch (regno) {
case R8:
case R9:
diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c
index c51dd8363d25..bae61554abcc 100644
--- a/arch/x86/um/user-offsets.c
+++ b/arch/x86/um/user-offsets.c
@@ -2,7 +2,7 @@
#include <stdio.h>
#include <stddef.h>
#include <signal.h>
-#include <sys/poll.h>
+#include <poll.h>
#include <sys/mman.h>
#include <sys/user.h>
#define __FRAME_OFFSETS
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 205b1176084f..aa9f50fccc5d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
* NB: needs to live in .data because it's used by xen_prepare_pvh which runs
* before clearing the bss.
*/
-uint32_t xen_start_flags __attribute__((section(".data"))) = 0;
+uint32_t xen_start_flags __section(".data") = 0;
EXPORT_SYMBOL(xen_start_flags);
/*
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 22e741e0b10c..4409306364dc 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -32,7 +32,7 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/edd.h>
-#include <linux/frame.h>
+#include <linux/objtool.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -1014,8 +1014,6 @@ void __init xen_setup_vcpu_info_placement(void)
}
static const struct pv_info xen_info __initconst = {
- .shared_kernel_pmd = 0,
-
.extra_user_64bit_cs = FLAT_USER_CS64,
.name = "Xen",
};
@@ -1302,7 +1300,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
* any NUMA information the kernel tries to get from ACPI will
* be meaningless. Prevent it from trying.
*/
- acpi_numa = -1;
+ disable_srat();
#endif
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
@@ -1314,10 +1312,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_start_info->nr_pages);
xen_reserve_special_pages();
- /* keep using Xen gdt for now; no urgent need to change it */
-
- pv_info.kernel_rpl = 0;
-
/*
* We used to do this in xen_arch_setup, but that is too late
* on AMD were early_cpu_init (run before ->arch_setup()) calls
@@ -1376,6 +1370,15 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
xen_boot_params_init_edd();
+
+#ifdef CONFIG_ACPI
+ /*
+ * Disable selecting "Firmware First mode" for correctable
+ * memory errors, as this is the duty of the hypervisor to
+ * decide.
+ */
+ acpi_disable_cmcff = 1;
+#endif
}
if (!boot_params.screen_info.orig_video_isVGA)
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 80a79db72fcf..0d5e34b9e6f9 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -21,7 +21,7 @@
* The variable xen_pvh needs to live in the data segment since it is used
* after startup_{32|64} is invoked, which will clear the .bss segment.
*/
-bool xen_pvh __attribute__((section(".data"))) = 0;
+bool xen_pvh __section(".data") = 0;
void __init xen_pvh_init(struct boot_params *boot_params)
{
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 4988e19598c8..1e681bf62561 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -25,6 +25,7 @@
static struct gnttab_vm_area {
struct vm_struct *area;
pte_t **ptes;
+ int idx;
} gnttab_shared_vm_area, gnttab_status_vm_area;
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
@@ -90,19 +91,31 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
}
}
+static int gnttab_apply(pte_t *pte, unsigned long addr, void *data)
+{
+ struct gnttab_vm_area *area = data;
+
+ area->ptes[area->idx++] = pte;
+ return 0;
+}
+
static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames)
{
area->ptes = kmalloc_array(nr_frames, sizeof(*area->ptes), GFP_KERNEL);
if (area->ptes == NULL)
return -ENOMEM;
-
- area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes);
- if (area->area == NULL) {
- kfree(area->ptes);
- return -ENOMEM;
- }
-
+ area->area = get_vm_area(PAGE_SIZE * nr_frames, VM_IOREMAP);
+ if (!area->area)
+ goto out_free_ptes;
+ if (apply_to_page_range(&init_mm, (unsigned long)area->area->addr,
+ PAGE_SIZE * nr_frames, gnttab_apply, area))
+ goto out_free_vm_area;
return 0;
+out_free_vm_area:
+ free_vm_area(area->area);
+out_free_ptes:
+ kfree(area->ptes);
+ return -ENOMEM;
}
static void arch_gnttab_vfree(struct gnttab_vm_area *area)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 3273c985d3dd..cf2ade864c30 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -285,13 +285,6 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
__xen_set_pte(ptep, pteval);
}
-static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval)
-{
- trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
- __xen_set_pte(ptep, pteval);
-}
-
pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
@@ -1149,7 +1142,7 @@ static void __init xen_pagetable_p2m_free(void)
* We could be in __ka space.
* We roundup to the PMD, which means that if anybody at this stage is
* using the __ka address of xen_start_info or
- * xen_start_info->shared_info they are in going to crash. Fortunatly
+ * xen_start_info->shared_info they are in going to crash. Fortunately
* we have already revectored in xen_setup_kernel_pagetable.
*/
size = roundup(size, PMD_SIZE);
@@ -2105,7 +2098,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.release_pmd = xen_release_pmd_init,
.set_pte = xen_set_pte_init,
- .set_pte_at = xen_set_pte_at,
.set_pmd = xen_set_pmd_hyper,
.ptep_modify_prot_start = __ptep_modify_prot_start,
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 33293ce01d8d..19ae3e4fe4e9 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -2,7 +2,7 @@
/* Glue code to lib/swiotlb-xen.c */
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/pci.h>
#include <xen/swiotlb-xen.h>
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 2097fa0ebdb5..c1b2f764b29a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -88,14 +88,17 @@ int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_callfunc_irq, cpu).irq = rc;
per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
- debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
- rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
- IRQF_PERCPU | IRQF_NOBALANCING,
- debug_name, NULL);
- if (rc < 0)
- goto fail;
- per_cpu(xen_debug_irq, cpu).irq = rc;
- per_cpu(xen_debug_irq, cpu).name = debug_name;
+ if (!xen_fifo_events) {
+ debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+ rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
+ xen_debug_interrupt,
+ IRQF_PERCPU | IRQF_NOBALANCING,
+ debug_name, NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(xen_debug_irq, cpu).irq = rc;
+ per_cpu(xen_debug_irq, cpu).name = debug_name;
+ }
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 45d556f71858..9546c3384c75 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -29,6 +29,8 @@ extern struct start_info *xen_start_info;
extern struct shared_info xen_dummy_shared_info;
extern struct shared_info *HYPERVISOR_shared_info;
+extern bool xen_fifo_events;
+
void xen_setup_mfn_list_list(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);