summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/fault.c52
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/init.c25
-rw-r--r--arch/x86/mm/init_32.c17
-rw-r--r--arch/x86/mm/kaslr.c8
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c19
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c4
-rw-r--r--arch/x86/mm/pat/set_memory.c6
-rw-r--r--arch/x86/mm/pgtable.c24
9 files changed, 66 insertions, 90 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4399983c50c..e8711b2cafaf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -880,12 +880,6 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}
-static noinline void
-bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
-{
- __bad_area(regs, error_code, address, 0, SEGV_MAPERR);
-}
-
static inline bool bad_area_access_from_pkeys(unsigned long error_code,
struct vm_area_struct *vma)
{
@@ -1366,51 +1360,10 @@ void do_user_addr_fault(struct pt_regs *regs,
lock_mmap:
#endif /* CONFIG_PER_VMA_LOCK */
- /*
- * Kernel-mode access to the user address space should only occur
- * on well-defined single instructions listed in the exception
- * tables. But, an erroneous kernel fault occurring outside one of
- * those areas which also holds mmap_lock might deadlock attempting
- * to validate the fault against the address space.
- *
- * Only do the expensive exception table search when we might be at
- * risk of a deadlock. This happens if we
- * 1. Failed to acquire mmap_lock, and
- * 2. The access did not originate in userspace.
- */
- if (unlikely(!mmap_read_trylock(mm))) {
- if (!user_mode(regs) && !search_exception_tables(regs->ip)) {
- /*
- * Fault from code in kernel from
- * which we do not expect faults.
- */
- bad_area_nosemaphore(regs, error_code, address);
- return;
- }
retry:
- mmap_read_lock(mm);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case we'll have missed the might_sleep() from
- * down_read():
- */
- might_sleep();
- }
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma)) {
- bad_area(regs, error_code, address);
- return;
- }
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, error_code, address);
- return;
- }
- if (unlikely(expand_stack(vma, address))) {
- bad_area(regs, error_code, address);
+ bad_area_nosemaphore(regs, error_code, address);
return;
}
@@ -1418,7 +1371,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address, vma);
return;
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 2c54b76d8f84..d9efa35711ee 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/swap.h> /* for totalram_pages */
#include <linux/memblock.h>
+#include <asm/numa.h>
void __init set_highmem_pages_init(void)
{
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 3cdac0f0055d..8192452d1d2d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -9,6 +9,7 @@
#include <linux/sched/task.h>
#include <asm/set_memory.h>
+#include <asm/cpu_device_id.h>
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/page.h>
@@ -261,6 +262,24 @@ static void __init probe_page_size_mask(void)
}
}
+#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \
+ .family = 6, \
+ .model = _model, \
+ }
+/*
+ * INVLPG may not properly flush Global entries
+ * on these CPUs when PCIDs are enabled.
+ */
+static const struct x86_cpu_id invlpg_miss_ids[] = {
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),
+ {}
+};
+
static void setup_pcid(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
@@ -269,6 +288,12 @@ static void setup_pcid(void)
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
+ if (x86_match_cpu(invlpg_miss_ids)) {
+ pr_info("Incomplete global flushes, disabling PCID");
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ return;
+ }
+
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() -- the
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d4e2648a1dfb..b63403d7179d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -45,7 +45,6 @@
#include <asm/olpc_ofw.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
-#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/page_types.h>
@@ -74,7 +73,6 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
pmd_table = (pmd_t *)alloc_low_page();
- paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
p4d = p4d_offset(pgd, 0);
pud = pud_offset(p4d, 0);
@@ -99,7 +97,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
pte_t *page_table = (pte_t *)alloc_low_page();
- paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
BUG_ON(page_table != pte_offset_kernel(pmd, 0));
}
@@ -181,12 +178,10 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
set_pte(newpte + i, pte[i]);
*adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
- paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
BUG_ON(newpte != pte_offset_kernel(pmd, 0));
__flush_tlb_all();
- paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
pte = newpte;
}
BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
@@ -482,7 +477,6 @@ void __init native_pagetable_init(void)
pfn, pmd, __pa(pmd), pte, __pa(pte));
pte_clear(NULL, va, pte);
}
- paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
paging_init();
}
@@ -491,15 +485,8 @@ void __init native_pagetable_init(void)
* point, we've been running on some set of pagetables constructed by
* the boot process.
*
- * If we're booting on native hardware, this will be a pagetable
- * constructed in arch/x86/kernel/head_32.S. The root of the
- * pagetable will be swapper_pg_dir.
- *
- * If we're booting paravirtualized under a hypervisor, then there are
- * more options: we may already be running PAE, and the pagetable may
- * or may not be based in swapper_pg_dir. In any case,
- * paravirt_pagetable_init() will set up swapper_pg_dir
- * appropriately for the rest of the initialization to work.
+ * This will be a pagetable constructed in arch/x86/kernel/head_32.S.
+ * The root of the pagetable will be swapper_pg_dir.
*
* In general, pagetable_init() assumes that the pagetable may already
* be partially populated, and so it avoids stomping on any existing
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 557f0fe25dff..37db264866b6 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void)
set_p4d(p4d_tramp,
__p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ trampoline_pgd_entry =
+ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp));
} else {
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+ trampoline_pgd_entry =
+ __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
}
}
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index e0b51c09109f..54bbd5163e8d 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
#endif
}
-static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
{
/*
* To maintain the security guarantees of SEV-SNP guests, make sure
@@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
*/
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
snp_set_memory_shared(vaddr, npages);
+
+ return true;
}
/* Return true unconditionally: return value doesn't matter for the SEV side */
@@ -501,6 +503,21 @@ void __init sme_early_init(void)
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
+
+ /*
+ * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
+ * parallel bringup low level code. That raises #VC which cannot be
+ * handled there.
+ * It does not provide a RDMSR GHCB protocol so the early startup
+ * code cannot directly communicate with the secure firmware. The
+ * alternative solution to retrieve the APIC ID via CPUID(0xb),
+ * which is covered by the GHCB protocol, is not viable either
+ * because there is no enforcement of the CPUID(0xb) provided
+ * "initial" APIC ID to be the same as the real APIC ID.
+ * Disable parallel bootup.
+ */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
+ x86_cpuinit.parallel_bringup = false;
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index c6efcf559d88..d73aeb16417f 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -188,7 +188,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
if (pmd_large(*pmd))
return;
- pte = pte_offset_map(pmd, ppd->vaddr);
+ pte = pte_offset_kernel(pmd, ppd->vaddr);
if (pte_none(*pte))
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
}
@@ -612,7 +612,7 @@ void __init sme_enable(struct boot_params *bp)
out:
if (sme_me_mask) {
physical_mask &= ~sme_me_mask;
- cc_set_vendor(CC_VENDOR_AMD);
+ cc_vendor = CC_VENDOR_AMD;
cc_set_mask(sme_me_mask);
}
}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 7159cf787613..df4182b6449f 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>
@@ -231,7 +232,7 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
* points to #2, but almost all physical-to-virtual translations point to #1.
*
* This is so that we can have both a directmap of all physical memory *and*
- * take full advantage of the the limited (s32) immediate addressing range (2G)
+ * take full advantage of the limited (s32) immediate addressing range (2G)
* of x86_64.
*
* See Documentation/arch/x86/x86_64/mm.rst for more detail.
@@ -2151,7 +2152,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
/* Notify hypervisor that we are about to set/clr encryption attribute. */
- x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
+ if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
+ return -EIO;
ret = __change_page_attr_set_clr(&cpa, 1);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e4f499eb0f29..15a8009a4480 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -702,14 +702,8 @@ void p4d_clear_huge(p4d_t *p4d)
* pud_set_huge - setup kernel PUD mapping
*
* MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
- * function sets up a huge page only if any of the following conditions are met:
- *
- * - MTRRs are disabled, or
- *
- * - MTRRs are enabled and the range is completely covered by a single MTRR, or
- *
- * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
- * has no effect on the requested PAT memory type.
+ * function sets up a huge page only if the complete range has the same MTRR
+ * caching mode.
*
* Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
* page mapping attempt fails.
@@ -718,11 +712,10 @@ void p4d_clear_huge(p4d_t *p4d)
*/
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr, uniform;
+ u8 uniform;
- mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
- if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
- (mtrr != MTRR_TYPE_WRBACK))
+ mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
+ if (!uniform)
return 0;
/* Bail out if we are we on a populated non-leaf entry: */
@@ -745,11 +738,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
*/
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr, uniform;
+ u8 uniform;
- mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
- if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
- (mtrr != MTRR_TYPE_WRBACK)) {
+ mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
+ if (!uniform) {
pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
__func__, addr, addr + PMD_SIZE);
return 0;