summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2023-02-15 20:33:28 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2023-02-15 20:33:28 +0300
commit33436335e93a1788a58443fc99c5ab320ce4b9d9 (patch)
treed92f88768c8dbd00f8b65164f47a8a091768b95a /arch/x86
parent27b025ebb0f6092d5c0a88de2ab73545bc1c496e (diff)
parentc39cea6f38eefe356d64d0bc1e1f2267e282cdd3 (diff)
downloadlinux-33436335e93a1788a58443fc99c5ab320ce4b9d9.tar.xz
Merge tag 'kvm-riscv-6.3-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv changes for 6.3 - Fix wrong usage of PGDIR_SIZE to check page sizes - Fix privilege mode setting in kvm_riscv_vcpu_trap_redirect() - Redirect illegal instruction traps to guest - SBI PMU support for guest
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/bioscall.S4
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c6
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/boot/compressed/sev.c70
-rw-r--r--arch/x86/events/intel/core.c1
-rw-r--r--arch/x86/events/intel/cstate.c22
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/msr.c3
-rw-r--r--arch/x86/include/asm/acpi.h8
-rw-r--r--arch/x86/include/asm/debugreg.h26
-rw-r--r--arch/x86/include/asm/msr-index.h20
-rw-r--r--arch/x86/include/uapi/asm/svm.h6
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c9
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c49
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c12
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kvm/vmx/vmx.c21
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/pat/memtype.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c44
-rw-r--r--arch/x86/pci/xen.c2
-rw-r--r--arch/x86/um/elfcore.c4
-rw-r--r--arch/x86/xen/p2m.c5
25 files changed, 271 insertions, 58 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9cf07322875a..73ed982d4100 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -14,13 +14,13 @@ endif
ifdef CONFIG_CC_IS_GCC
RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
-RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix)
RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register)
endif
ifdef CONFIG_CC_IS_CLANG
RETPOLINE_CFLAGS := -mretpoline-external-thunk
RETPOLINE_VDSO_CFLAGS := -mretpoline
endif
+RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix)
ifdef CONFIG_RETHUNK
RETHUNK_CFLAGS := -mfunction-return=thunk-extern
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index 5521ea12f44e..aa9b96457584 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -32,7 +32,7 @@ intcall:
movw %dx, %si
movw %sp, %di
movw $11, %cx
- rep; movsd
+ rep; movsl
/* Pop full state from the stack */
popal
@@ -67,7 +67,7 @@ intcall:
jz 4f
movw %sp, %si
movw $11, %cx
- rep; movsd
+ rep; movsl
4: addw $44, %sp
/* Restore state and return */
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index d4a314cc50d6..321a5011042d 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -180,6 +180,12 @@ void initialize_identity_maps(void *rmode)
/* Load the new page-table. */
write_cr3(top_level_pgt);
+
+ /*
+ * Now that the required page table mappings are established and a
+ * GHCB can be used, check for SNP guest/HV feature compatibility.
+ */
+ snp_check_features();
}
static pte_t *split_large_pmd(struct x86_mapping_info *info,
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 62208ec04ca4..20118fb7c53b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -126,6 +126,7 @@ static inline void console_init(void)
#ifdef CONFIG_AMD_MEM_ENCRYPT
void sev_enable(struct boot_params *bp);
+void snp_check_features(void);
void sev_es_shutdown_ghcb(void);
extern bool sev_es_check_ghcb_fault(unsigned long address);
void snp_set_page_private(unsigned long paddr);
@@ -143,6 +144,7 @@ static inline void sev_enable(struct boot_params *bp)
if (bp)
bp->cc_blob_address = 0;
}
+static inline void snp_check_features(void) { }
static inline void sev_es_shutdown_ghcb(void) { }
static inline bool sev_es_check_ghcb_fault(unsigned long address)
{
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index c93930d5ccbd..d63ad8f99f83 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -208,6 +208,23 @@ void sev_es_shutdown_ghcb(void)
error("Can't unmap GHCB page");
}
+static void __noreturn sev_es_ghcb_terminate(struct ghcb *ghcb, unsigned int set,
+ unsigned int reason, u64 exit_info_2)
+{
+ u64 exit_info_1 = SVM_VMGEXIT_TERM_REASON(set, reason);
+
+ vc_ghcb_invalidate(ghcb);
+ ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_TERM_REQUEST);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ sev_es_wr_ghcb_msr(__pa(ghcb));
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
bool sev_es_check_ghcb_fault(unsigned long address)
{
/* Check whether the fault was on the GHCB page */
@@ -270,6 +287,59 @@ static void enforce_vmpl0(void)
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0);
}
+/*
+ * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need
+ * guest side implementation for proper functioning of the guest. If any
+ * of these features are enabled in the hypervisor but are lacking guest
+ * side implementation, the behavior of the guest will be undefined. The
+ * guest could fail in non-obvious way making it difficult to debug.
+ *
+ * As the behavior of reserved feature bits is unknown to be on the
+ * safe side add them to the required features mask.
+ */
+#define SNP_FEATURES_IMPL_REQ (MSR_AMD64_SNP_VTOM | \
+ MSR_AMD64_SNP_REFLECT_VC | \
+ MSR_AMD64_SNP_RESTRICTED_INJ | \
+ MSR_AMD64_SNP_ALT_INJ | \
+ MSR_AMD64_SNP_DEBUG_SWAP | \
+ MSR_AMD64_SNP_VMPL_SSS | \
+ MSR_AMD64_SNP_SECURE_TSC | \
+ MSR_AMD64_SNP_VMGEXIT_PARAM | \
+ MSR_AMD64_SNP_VMSA_REG_PROTECTION | \
+ MSR_AMD64_SNP_RESERVED_BIT13 | \
+ MSR_AMD64_SNP_RESERVED_BIT15 | \
+ MSR_AMD64_SNP_RESERVED_MASK)
+
+/*
+ * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented
+ * by the guest kernel. As and when a new feature is implemented in the
+ * guest kernel, a corresponding bit should be added to the mask.
+ */
+#define SNP_FEATURES_PRESENT (0)
+
+void snp_check_features(void)
+{
+ u64 unsupported;
+
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
+ return;
+
+ /*
+ * Terminate the boot if hypervisor has enabled any feature lacking
+ * guest side implementation. Pass on the unsupported features mask through
+ * EXIT_INFO_2 of the GHCB protocol so that those features can be reported
+ * as part of the guest boot failure.
+ */
+ unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+ if (unsupported) {
+ if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
+ sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+
+ sev_es_ghcb_terminate(boot_ghcb, SEV_TERM_SET_GEN,
+ GHCB_SNP_UNSUPPORTED, unsupported);
+ }
+}
+
void sev_enable(struct boot_params *bp)
{
unsigned int eax, ebx, ecx, edx;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aa53d042b943..73bd0d70817e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6339,6 +6339,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
pmem = true;
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index a2834bc93149..551741e79e03 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -41,6 +41,7 @@
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
+ * MTL
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
@@ -51,50 +52,50 @@
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL,SPR
+ * TGL,TNT,RKL,ADL,RPL,SPR,MTL
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
- * ICL,TGL,RKL,ADL,RPL
+ * ICL,TGL,RKL,ADL,RPL,MTL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
- * RPL,SPR
+ * RPL,SPR,MTL
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
- * ADL,RPL
+ * ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
- * TGL,TNT,RKL,ADL,RPL,SPR
+ * TGL,TNT,RKL,ADL,RPL,SPR,MTL
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
- * KBL,CML,ICL,TGL,RKL,ADL,RPL
+ * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL,RPL
+ * ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
- * ADL,RPL
+ * ADL,RPL,MTL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
- * TNT,RKL,ADL,RPL
+ * TNT,RKL,ADL,RPL,MTL
* Scope: Package (physical package)
*
*/
@@ -676,6 +677,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
@@ -686,6 +688,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 6f1ccc57a692..459b1aafd4d4 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1833,6 +1833,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
{},
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index ecced3a52668..c65d8906cbcf 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_BROADWELL_G:
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
case INTEL_FAM6_ATOM_SILVERMONT:
case INTEL_FAM6_ATOM_SILVERMONT_D:
@@ -107,6 +108,8 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
+ case INTEL_FAM6_METEORLAKE:
+ case INTEL_FAM6_METEORLAKE_L:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 65064d9f7fa6..8eb74cf386db 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -14,6 +14,7 @@
#include <asm/mmu.h>
#include <asm/mpspec.h>
#include <asm/x86_init.h>
+#include <asm/cpufeature.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@@ -63,6 +64,13 @@ extern int (*acpi_suspend_lowlevel)(void);
/* Physical address to resume after wakeup */
unsigned long acpi_get_wakeup_address(void);
+static inline bool acpi_skip_set_wakeup_address(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+
+#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
+
/*
* Check if the CPU can handle C2 and deeper
*/
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index b049d950612f..ca97442e8d49 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -39,7 +39,20 @@ static __always_inline unsigned long native_get_debugreg(int regno)
asm("mov %%db6, %0" :"=r" (val));
break;
case 7:
- asm("mov %%db7, %0" :"=r" (val));
+ /*
+ * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them
+ * with other code.
+ *
+ * This is needed because a DR7 access can cause a #VC exception
+ * when running under SEV-ES. Taking a #VC exception is not a
+ * safe thing to do just anywhere in the entry code and
+ * re-ordering might place the access into an unsafe location.
+ *
+ * This happened in the NMI handler, where the DR7 read was
+ * re-ordered to happen before the call to sev_es_ist_enter(),
+ * causing stack recursion.
+ */
+ asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER);
break;
default:
BUG();
@@ -66,7 +79,16 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
asm("mov %0, %%db6" ::"r" (value));
break;
case 7:
- asm("mov %0, %%db7" ::"r" (value));
+ /*
+ * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them
+ * with other code.
+ *
+ * While is didn't happen with a DR7 write (see the DR7 read
+ * comment above which explains where it happened), add the
+ * __FORCE_ORDER here too to avoid similar problems in the
+ * future.
+ */
+ asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER);
break;
default:
BUG();
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 37ff47552bcb..d3fe82c5d6b6 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -566,6 +566,26 @@
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
+/* SNP feature bits enabled by the hypervisor */
+#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
+#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
+#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
+
+/* SNP feature bits reserved for future use. */
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
+
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
/* AMD Collaborative Processor Performance Control MSRs */
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index f69c168391aa..80e1df482337 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -116,6 +116,12 @@
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
+#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
+#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
+ /* SW_EXITINFO1[3:0] */ \
+ (((((u64)reason_set) & 0xf)) | \
+ /* SW_EXITINFO1[11:4] */ \
+ ((((u64)reason_code) & 0xff) << 4))
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
/* Exit code reserved for hypervisor/software use */
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 1f60a2b27936..fdbb5f07448f 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -330,7 +330,16 @@ static void __init bp_init_freq_invariance(void)
static void disable_freq_invariance_workfn(struct work_struct *work)
{
+ int cpu;
+
static_branch_disable(&arch_scale_freq_key);
+
+ /*
+ * Set arch_freq_scale to a default value on all cpus
+ * This negates the effect of scaling
+ */
+ for_each_possible_cpu(cpu)
+ per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE;
}
static DECLARE_WORK(disable_freq_invariance_work,
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index efe0c30d3a12..77538abeb72a 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid)
return entry;
}
+static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
+{
+ u64 msr_val;
+
+ /*
+ * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
+ * with a valid event code for supported resource type and the bits
+ * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
+ * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
+ * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
+ * are error bits.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+ if (msr_val & RMID_VAL_ERROR)
+ return -EIO;
+ if (msr_val & RMID_VAL_UNAVAIL)
+ return -EINVAL;
+
+ *val = msr_val;
+ return 0;
+}
+
static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
u32 rmid,
enum resctrl_event_id eventid)
@@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
struct arch_mbm_state *am;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
- if (am)
+ if (am) {
memset(am, 0, sizeof(*am));
+
+ /* Record any initial, non-zero count value. */
+ __rmid_read(rmid, eventid, &am->prev_msr);
+ }
}
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
@@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct arch_mbm_state *am;
u64 msr_val, chunks;
+ int ret;
if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
return -EINVAL;
- /*
- * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
- * with a valid event code for supported resource type and the bits
- * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
- * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
- * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
- * are error bits.
- */
- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
- rdmsrl(MSR_IA32_QM_CTR, msr_val);
-
- if (msr_val & RMID_VAL_ERROR)
- return -EIO;
- if (msr_val & RMID_VAL_UNAVAIL)
- return -EINVAL;
+ ret = __rmid_read(rmid, eventid, &msr_val);
+ if (ret)
+ return ret;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
if (am) {
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index e5a48f05e787..5993da21d822 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
+ * This pairs with the full barrier between the rq->curr update and
+ * resctrl_sched_in() during context switch.
*/
- barrier();
+ smp_mb();
/*
* By now, the task's closid and rmid are set. If the task is current
@@ -2402,6 +2404,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
WRITE_ONCE(t->rmid, to->mon.rmid);
/*
+ * Order the closid/rmid stores above before the loads
+ * in task_curr(). This pairs with the full barrier
+ * between the rq->curr update and resctrl_sched_in()
+ * during context switch.
+ */
+ smp_mb();
+
+ /*
* If the task is on a CPU, set the CPU in the mask.
* The detection is inaccurate as tasks might move or
* schedule before the smp function call takes place.
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 3aa5304200c5..4d8aff05a509 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq)
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<<irq);
irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
+ irq_set_status_flags(irq, IRQ_LEVEL);
enable_irq(irq);
lapic_assign_legacy_vector(irq, true);
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index beb1bada1b0a..c683666876f1 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -65,8 +65,10 @@ void __init init_ISA_irqs(void)
legacy_pic->init(0);
- for (i = 0; i < nr_legacy_irqs(); i++)
+ for (i = 0; i < nr_legacy_irqs(); i++) {
irq_set_chip_and_handler(i, chip, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
+ }
}
void __init init_IRQ(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0f82c247e1fd..7896acf05117 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3539,18 +3539,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
- if (var->unusable || !var->present)
- ar = 1 << 16;
- else {
- ar = var->type & 15;
- ar |= (var->s & 1) << 4;
- ar |= (var->dpl & 3) << 5;
- ar |= (var->present & 1) << 7;
- ar |= (var->avl & 1) << 12;
- ar |= (var->l & 1) << 13;
- ar |= (var->db & 1) << 14;
- ar |= (var->g & 1) << 15;
- }
+ ar = var->type & 15;
+ ar |= (var->s & 1) << 4;
+ ar |= (var->dpl & 3) << 5;
+ ar |= (var->present & 1) << 7;
+ ar |= (var->avl & 1) << 12;
+ ar |= (var->l & 1) << 13;
+ ar |= (var->db & 1) << 14;
+ ar |= (var->g & 1) << 15;
+ ar |= (var->unusable || !var->present) << 16;
return ar;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d3987359d441..cb258f58fdc8 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -26,6 +26,7 @@
#include <asm/pti.h>
#include <asm/text-patching.h>
#include <asm/memtype.h>
+#include <asm/paravirt.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -804,6 +805,9 @@ void __init poking_init(void)
poking_mm = mm_alloc();
BUG_ON(!poking_mm);
+ /* Xen PV guests need the PGD to be pinned. */
+ paravirt_arch_dup_mmap(NULL, poking_mm);
+
/*
* Randomize the poking address, but make sure that the following page
* will be mapped at the same PMD. We need 2 pages, so find space for 3,
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 46de9cf5c91d..fb4b1b5e0dea 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -387,7 +387,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end,
u8 mtrr_type, uniform;
mtrr_type = mtrr_type_lookup(start, end, &uniform);
- if (mtrr_type != MTRR_TYPE_WRBACK)
+ if (mtrr_type != MTRR_TYPE_WRBACK &&
+ mtrr_type != MTRR_TYPE_INVALID)
return _PAGE_CACHE_MODE_UC_MINUS;
return _PAGE_CACHE_MODE_WB;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 758cbfe55daa..4b3efaa82ab7 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -12,6 +12,7 @@
*/
#include <linux/acpi.h>
+#include <linux/efi.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/bitmap.h>
@@ -442,17 +443,42 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used)
return mcfg_res.flags;
}
+static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used)
+{
+#ifdef CONFIG_EFI
+ efi_memory_desc_t *md;
+ u64 size, mmio_start, mmio_end;
+
+ for_each_efi_memory_desc(md) {
+ if (md->type == EFI_MEMORY_MAPPED_IO) {
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ mmio_start = md->phys_addr;
+ mmio_end = mmio_start + size;
+
+ /*
+ * N.B. Caller supplies (start, start + size),
+ * so to match, mmio_end is the first address
+ * *past* the EFI_MEMORY_MAPPED_IO area.
+ */
+ if (mmio_start <= start && end <= mmio_end)
+ return true;
+ }
+ }
+#endif
+
+ return false;
+}
+
typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type);
static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
struct pci_mmcfg_region *cfg,
- struct device *dev, int with_e820)
+ struct device *dev, const char *method)
{
u64 addr = cfg->res.start;
u64 size = resource_size(&cfg->res);
u64 old_size = size;
int num_buses;
- char *method = with_e820 ? "E820" : "ACPI motherboard resources";
while (!is_reserved(addr, addr + size, E820_TYPE_RESERVED)) {
size >>= 1;
@@ -464,10 +490,10 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
return false;
if (dev)
- dev_info(dev, "MMCONFIG at %pR reserved in %s\n",
+ dev_info(dev, "MMCONFIG at %pR reserved as %s\n",
&cfg->res, method);
else
- pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n",
+ pr_info(PREFIX "MMCONFIG at %pR reserved as %s\n",
&cfg->res, method);
if (old_size != size) {
@@ -500,7 +526,8 @@ static bool __ref
pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early)
{
if (!early && !acpi_disabled) {
- if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0))
+ if (is_mmconf_reserved(is_acpi_reserved, cfg, dev,
+ "ACPI motherboard resource"))
return true;
if (dev)
@@ -513,6 +540,10 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e
"MMCONFIG at %pR not reserved in "
"ACPI motherboard resources\n",
&cfg->res);
+
+ if (is_mmconf_reserved(is_efi_mmio, cfg, dev,
+ "EfiMemoryMappedIO"))
+ return true;
}
/*
@@ -527,7 +558,8 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e
/* Don't try to do this check unless configuration
type 1 is available. how about type 2 ?*/
if (raw_pci_ops)
- return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1);
+ return is_mmconf_reserved(e820__mapped_all, cfg, dev,
+ "E820 entry");
return false;
}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index b94f727251b6..8babce71915f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -392,6 +392,7 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
msi_for_each_desc(msidesc, &dev->dev, MSI_DESC_ASSOCIATED) {
for (i = 0; i < msidesc->nvec_used; i++)
xen_destroy_irq(msidesc->irq + i);
+ msidesc->irq = 0;
}
}
@@ -433,6 +434,7 @@ static struct msi_domain_ops xen_pci_msi_domain_ops = {
};
static struct msi_domain_info xen_pci_msi_domain_info = {
+ .flags = MSI_FLAG_PCI_MSIX | MSI_FLAG_FREE_MSI_DESCS | MSI_FLAG_DEV_SYSFS,
.ops = &xen_pci_msi_domain_ops,
};
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 48a3eb09d951..650cdbbdaf45 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -7,7 +7,7 @@
#include <asm/elf.h>
-Elf32_Half elf_core_extra_phdrs(void)
+Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
}
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
return 1;
}
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
if ( vsyscall_ehdr ) {
const struct elfhdr *const ehdrp =
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58db86f7b384..9bdc3b656b2c 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -134,11 +134,6 @@ static inline unsigned p2m_mid_index(unsigned long pfn)
return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
}
-static inline unsigned p2m_index(unsigned long pfn)
-{
- return pfn % P2M_PER_PAGE;
-}
-
static void p2m_top_mfn_init(unsigned long *top)
{
unsigned i;