summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/parisc/include/asm/cacheflush.h15
-rw-r--r--arch/parisc/include/asm/pgtable.h27
-rw-r--r--arch/parisc/kernel/cache.c413
-rw-r--r--arch/powerpc/include/asm/uaccess.h16
-rw-r--r--arch/powerpc/platforms/85xx/smp.c9
-rw-r--r--arch/riscv/kvm/aia_device.c7
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c4
-rw-r--r--arch/riscv/mm/init.c24
-rw-r--r--arch/riscv/mm/pageattr.c28
-rw-r--r--arch/x86/boot/compressed/Makefile4
-rw-r--r--arch/x86/boot/main.c4
-rw-r--r--arch/x86/include/asm/alternative.h22
-rw-r--r--arch/x86/include/asm/atomic64_32.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/irq_stack.h2
-rw-r--r--arch/x86/include/asm/uaccess.h6
-rw-r--r--arch/x86/kernel/amd_nb.c9
-rw-r--r--arch/x86/kernel/cpu/common.c21
-rw-r--r--arch/x86/kernel/machine_kexec_64.c11
-rw-r--r--arch/x86/kvm/svm/sev.c19
-rw-r--r--arch/x86/kvm/svm/svm.c24
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/lib/getuser.S6
-rw-r--r--arch/x86/mm/numa.c6
24 files changed, 427 insertions, 258 deletions
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index ba4c05bc24d6..8394718870e1 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -31,18 +31,17 @@ void flush_cache_all_local(void);
void flush_cache_all(void);
void flush_cache_mm(struct mm_struct *mm);
-void flush_kernel_dcache_page_addr(const void *addr);
-
#define flush_kernel_dcache_range(start,size) \
flush_kernel_dcache_range_asm((start), (start)+(size));
+/* The only way to flush a vmap range is to flush whole cache */
#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
void flush_kernel_vmap_range(void *vaddr, int size);
void invalidate_kernel_vmap_range(void *vaddr, int size);
-#define flush_cache_vmap(start, end) flush_cache_all()
+void flush_cache_vmap(unsigned long start, unsigned long end);
#define flush_cache_vmap_early(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) flush_cache_all()
+void flush_cache_vunmap(unsigned long start, unsigned long end);
void flush_dcache_folio(struct folio *folio);
#define flush_dcache_folio flush_dcache_folio
@@ -77,17 +76,11 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
-/* defined in pacache.S exported in cache.c used by flush_anon_page */
-void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
-
#define ARCH_HAS_FLUSH_ANON_PAGE
void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
#define ARCH_HAS_FLUSH_ON_KUNMAP
-static inline void kunmap_flush_on_unmap(const void *addr)
-{
- flush_kernel_dcache_page_addr(addr);
-}
+void kunmap_flush_on_unmap(const void *addr);
#endif /* _PARISC_CACHEFLUSH_H */
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 974accac05cd..babf65751e81 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -448,14 +448,17 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return pte;
}
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+#define ptep_get ptep_get
+
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
pte_t pte;
- if (!pte_young(*ptep))
- return 0;
-
- pte = *ptep;
+ pte = ptep_get(ptep);
if (!pte_young(pte)) {
return 0;
}
@@ -463,17 +466,10 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
return 1;
}
-struct mm_struct;
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- pte_t old_pte;
-
- old_pte = *ptep;
- set_pte(ptep, __pte(0));
-
- return old_pte;
-}
+int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
set_pte(ptep, pte_wrprotect(*ptep));
@@ -511,7 +507,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define __HAVE_ARCH_PTE_SAME
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 422f3e1e6d9c..483bfafd930c 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -20,6 +20,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/syscalls.h>
+#include <linux/vmalloc.h>
#include <asm/pdc.h>
#include <asm/cache.h>
#include <asm/cacheflush.h>
@@ -31,20 +32,31 @@
#include <asm/mmu_context.h>
#include <asm/cachectl.h>
+#define PTR_PAGE_ALIGN_DOWN(addr) PTR_ALIGN_DOWN(addr, PAGE_SIZE)
+
+/*
+ * When nonzero, use _PAGE_ACCESSED bit to try to reduce the number
+ * of page flushes done flush_cache_page_if_present. There are some
+ * pros and cons in using this option. It may increase the risk of
+ * random segmentation faults.
+ */
+#define CONFIG_FLUSH_PAGE_ACCESSED 0
+
int split_tlb __ro_after_init;
int dcache_stride __ro_after_init;
int icache_stride __ro_after_init;
EXPORT_SYMBOL(dcache_stride);
+/* Internal implementation in arch/parisc/kernel/pacache.S */
void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
EXPORT_SYMBOL(flush_dcache_page_asm);
void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
-
-/* Internal implementation in arch/parisc/kernel/pacache.S */
void flush_data_cache_local(void *); /* flushes local data-cache only */
void flush_instruction_cache_local(void); /* flushes local code-cache only */
+static void flush_kernel_dcache_page_addr(const void *addr);
+
/* On some machines (i.e., ones with the Merced bus), there can be
* only a single PxTLB broadcast at a time; this must be guaranteed
* by software. We need a spinlock around all TLB flushes to ensure
@@ -321,6 +333,18 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
{
if (!static_branch_likely(&parisc_has_cache))
return;
+
+ /*
+ * The TLB is the engine of coherence on parisc. The CPU is
+ * entitled to speculate any page with a TLB mapping, so here
+ * we kill the mapping then flush the page along a special flush
+ * only alias mapping. This guarantees that the page is no-longer
+ * in the cache for any process and nor may it be speculatively
+ * read in (until the user or kernel specifically accesses it,
+ * of course).
+ */
+ flush_tlb_page(vma, vmaddr);
+
preempt_disable();
flush_dcache_page_asm(physaddr, vmaddr);
if (vma->vm_flags & VM_EXEC)
@@ -328,46 +352,44 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
preempt_enable();
}
-static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr)
+static void flush_kernel_dcache_page_addr(const void *addr)
{
- unsigned long flags, space, pgd, prot;
-#ifdef CONFIG_TLB_PTLOCK
- unsigned long pgd_lock;
-#endif
+ unsigned long vaddr = (unsigned long)addr;
+ unsigned long flags;
- vmaddr &= PAGE_MASK;
+ /* Purge TLB entry to remove translation on all CPUs */
+ purge_tlb_start(flags);
+ pdtlb(SR_KERNEL, addr);
+ purge_tlb_end(flags);
+ /* Use tmpalias flush to prevent data cache move-in */
preempt_disable();
+ flush_dcache_page_asm(__pa(vaddr), vaddr);
+ preempt_enable();
+}
- /* Set context for flush */
- local_irq_save(flags);
- prot = mfctl(8);
- space = mfsp(SR_USER);
- pgd = mfctl(25);
-#ifdef CONFIG_TLB_PTLOCK
- pgd_lock = mfctl(28);
-#endif
- switch_mm_irqs_off(NULL, vma->vm_mm, NULL);
- local_irq_restore(flags);
-
- flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
- if (vma->vm_flags & VM_EXEC)
- flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
- flush_tlb_page(vma, vmaddr);
+static void flush_kernel_icache_page_addr(const void *addr)
+{
+ unsigned long vaddr = (unsigned long)addr;
+ unsigned long flags;
- /* Restore previous context */
- local_irq_save(flags);
-#ifdef CONFIG_TLB_PTLOCK
- mtctl(pgd_lock, 28);
-#endif
- mtctl(pgd, 25);
- mtsp(space, SR_USER);
- mtctl(prot, 8);
- local_irq_restore(flags);
+ /* Purge TLB entry to remove translation on all CPUs */
+ purge_tlb_start(flags);
+ pdtlb(SR_KERNEL, addr);
+ purge_tlb_end(flags);
+ /* Use tmpalias flush to prevent instruction cache move-in */
+ preempt_disable();
+ flush_icache_page_asm(__pa(vaddr), vaddr);
preempt_enable();
}
+void kunmap_flush_on_unmap(const void *addr)
+{
+ flush_kernel_dcache_page_addr(addr);
+}
+EXPORT_SYMBOL(kunmap_flush_on_unmap);
+
void flush_icache_pages(struct vm_area_struct *vma, struct page *page,
unsigned int nr)
{
@@ -375,13 +397,16 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page,
for (;;) {
flush_kernel_dcache_page_addr(kaddr);
- flush_kernel_icache_page(kaddr);
+ flush_kernel_icache_page_addr(kaddr);
if (--nr == 0)
break;
kaddr += PAGE_SIZE;
}
}
+/*
+ * Walk page directory for MM to find PTEP pointer for address ADDR.
+ */
static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
{
pte_t *ptep = NULL;
@@ -410,6 +435,41 @@ static inline bool pte_needs_flush(pte_t pte)
== (_PAGE_PRESENT | _PAGE_ACCESSED);
}
+/*
+ * Return user physical address. Returns 0 if page is not present.
+ */
+static inline unsigned long get_upa(struct mm_struct *mm, unsigned long addr)
+{
+ unsigned long flags, space, pgd, prot, pa;
+#ifdef CONFIG_TLB_PTLOCK
+ unsigned long pgd_lock;
+#endif
+
+ /* Save context */
+ local_irq_save(flags);
+ prot = mfctl(8);
+ space = mfsp(SR_USER);
+ pgd = mfctl(25);
+#ifdef CONFIG_TLB_PTLOCK
+ pgd_lock = mfctl(28);
+#endif
+
+ /* Set context for lpa_user */
+ switch_mm_irqs_off(NULL, mm, NULL);
+ pa = lpa_user(addr);
+
+ /* Restore previous context */
+#ifdef CONFIG_TLB_PTLOCK
+ mtctl(pgd_lock, 28);
+#endif
+ mtctl(pgd, 25);
+ mtsp(space, SR_USER);
+ mtctl(prot, 8);
+ local_irq_restore(flags);
+
+ return pa;
+}
+
void flush_dcache_folio(struct folio *folio)
{
struct address_space *mapping = folio_flush_mapping(folio);
@@ -458,50 +518,23 @@ void flush_dcache_folio(struct folio *folio)
if (addr + nr * PAGE_SIZE > vma->vm_end)
nr = (vma->vm_end - addr) / PAGE_SIZE;
- if (parisc_requires_coherency()) {
- for (i = 0; i < nr; i++) {
- pte_t *ptep = get_ptep(vma->vm_mm,
- addr + i * PAGE_SIZE);
- if (!ptep)
- continue;
- if (pte_needs_flush(*ptep))
- flush_user_cache_page(vma,
- addr + i * PAGE_SIZE);
- /* Optimise accesses to the same table? */
- pte_unmap(ptep);
- }
- } else {
+ if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
+ != (addr & (SHM_COLOUR - 1))) {
+ for (i = 0; i < nr; i++)
+ __flush_cache_page(vma,
+ addr + i * PAGE_SIZE,
+ (pfn + i) * PAGE_SIZE);
/*
- * The TLB is the engine of coherence on parisc:
- * The CPU is entitled to speculate any page
- * with a TLB mapping, so here we kill the
- * mapping then flush the page along a special
- * flush only alias mapping. This guarantees that
- * the page is no-longer in the cache for any
- * process and nor may it be speculatively read
- * in (until the user or kernel specifically
- * accesses it, of course)
+ * Software is allowed to have any number
+ * of private mappings to a page.
*/
- for (i = 0; i < nr; i++)
- flush_tlb_page(vma, addr + i * PAGE_SIZE);
- if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
- != (addr & (SHM_COLOUR - 1))) {
- for (i = 0; i < nr; i++)
- __flush_cache_page(vma,
- addr + i * PAGE_SIZE,
- (pfn + i) * PAGE_SIZE);
- /*
- * Software is allowed to have any number
- * of private mappings to a page.
- */
- if (!(vma->vm_flags & VM_SHARED))
- continue;
- if (old_addr)
- pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
- old_addr, addr, vma->vm_file);
- if (nr == folio_nr_pages(folio))
- old_addr = addr;
- }
+ if (!(vma->vm_flags & VM_SHARED))
+ continue;
+ if (old_addr)
+ pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
+ old_addr, addr, vma->vm_file);
+ if (nr == folio_nr_pages(folio))
+ old_addr = addr;
}
WARN_ON(++count == 4096);
}
@@ -591,35 +624,28 @@ extern void purge_kernel_dcache_page_asm(unsigned long);
extern void clear_user_page_asm(void *, unsigned long);
extern void copy_user_page_asm(void *, void *, unsigned long);
-void flush_kernel_dcache_page_addr(const void *addr)
-{
- unsigned long flags;
-
- flush_kernel_dcache_page_asm(addr);
- purge_tlb_start(flags);
- pdtlb(SR_KERNEL, addr);
- purge_tlb_end(flags);
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
-
static void flush_cache_page_if_present(struct vm_area_struct *vma,
- unsigned long vmaddr, unsigned long pfn)
+ unsigned long vmaddr)
{
+#if CONFIG_FLUSH_PAGE_ACCESSED
bool needs_flush = false;
- pte_t *ptep;
+ pte_t *ptep, pte;
- /*
- * The pte check is racy and sometimes the flush will trigger
- * a non-access TLB miss. Hopefully, the page has already been
- * flushed.
- */
ptep = get_ptep(vma->vm_mm, vmaddr);
if (ptep) {
- needs_flush = pte_needs_flush(*ptep);
+ pte = ptep_get(ptep);
+ needs_flush = pte_needs_flush(pte);
pte_unmap(ptep);
}
if (needs_flush)
- flush_cache_page(vma, vmaddr, pfn);
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(pte_pfn(pte)));
+#else
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long physaddr = get_upa(mm, vmaddr);
+
+ if (physaddr)
+ __flush_cache_page(vma, vmaddr, PAGE_ALIGN_DOWN(physaddr));
+#endif
}
void copy_user_highpage(struct page *to, struct page *from,
@@ -629,7 +655,7 @@ void copy_user_highpage(struct page *to, struct page *from,
kfrom = kmap_local_page(from);
kto = kmap_local_page(to);
- flush_cache_page_if_present(vma, vaddr, page_to_pfn(from));
+ __flush_cache_page(vma, vaddr, PFN_PHYS(page_to_pfn(from)));
copy_page_asm(kto, kfrom);
kunmap_local(kto);
kunmap_local(kfrom);
@@ -638,16 +664,17 @@ void copy_user_highpage(struct page *to, struct page *from,
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long user_vaddr, void *dst, void *src, int len)
{
- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page)));
memcpy(dst, src, len);
- flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len);
+ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(dst));
}
void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long user_vaddr, void *dst, void *src, int len)
{
- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page)));
memcpy(dst, src, len);
+ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(src));
}
/* __flush_tlb_range()
@@ -681,32 +708,10 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- unsigned long addr, pfn;
- pte_t *ptep;
-
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- bool needs_flush = false;
- /*
- * The vma can contain pages that aren't present. Although
- * the pte search is expensive, we need the pte to find the
- * page pfn and to check whether the page should be flushed.
- */
- ptep = get_ptep(vma->vm_mm, addr);
- if (ptep) {
- needs_flush = pte_needs_flush(*ptep);
- pfn = pte_pfn(*ptep);
- pte_unmap(ptep);
- }
- if (needs_flush) {
- if (parisc_requires_coherency()) {
- flush_user_cache_page(vma, addr);
- } else {
- if (WARN_ON(!pfn_valid(pfn)))
- return;
- __flush_cache_page(vma, addr, PFN_PHYS(pfn));
- }
- }
- }
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PAGE_SIZE)
+ flush_cache_page_if_present(vma, addr);
}
static inline unsigned long mm_total_size(struct mm_struct *mm)
@@ -757,21 +762,19 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
return;
flush_tlb_range(vma, start, end);
- flush_cache_all();
+ if (vma->vm_flags & VM_EXEC)
+ flush_cache_all();
+ else
+ flush_data_cache();
return;
}
- flush_cache_pages(vma, start, end);
+ flush_cache_pages(vma, start & PAGE_MASK, end);
}
void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
{
- if (WARN_ON(!pfn_valid(pfn)))
- return;
- if (parisc_requires_coherency())
- flush_user_cache_page(vma, vmaddr);
- else
- __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
}
void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
@@ -779,34 +782,133 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon
if (!PageAnon(page))
return;
- if (parisc_requires_coherency()) {
- if (vma->vm_flags & VM_SHARED)
- flush_data_cache();
- else
- flush_user_cache_page(vma, vmaddr);
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page)));
+}
+
+int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_t pte = ptep_get(ptep);
+
+ if (!pte_young(pte))
+ return 0;
+ set_pte(ptep, pte_mkold(pte));
+#if CONFIG_FLUSH_PAGE_ACCESSED
+ __flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte)));
+#endif
+ return 1;
+}
+
+/*
+ * After a PTE is cleared, we have no way to flush the cache for
+ * the physical page. On PA8800 and PA8900 processors, these lines
+ * can cause random cache corruption. Thus, we must flush the cache
+ * as well as the TLB when clearing a PTE that's valid.
+ */
+pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
+{
+ struct mm_struct *mm = (vma)->vm_mm;
+ pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+ unsigned long pfn = pte_pfn(pte);
+
+ if (pfn_valid(pfn))
+ __flush_cache_page(vma, addr, PFN_PHYS(pfn));
+ else if (pte_accessible(mm, pte))
+ flush_tlb_page(vma, addr);
+
+ return pte;
+}
+
+/*
+ * The physical address for pages in the ioremap case can be obtained
+ * from the vm_struct struct. I wasn't able to successfully handle the
+ * vmalloc and vmap cases. We have an array of struct page pointers in
+ * the uninitialized vmalloc case but the flush failed using page_to_pfn.
+ */
+void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+ unsigned long addr, physaddr;
+ struct vm_struct *vm;
+
+ /* Prevent cache move-in */
+ flush_tlb_kernel_range(start, end);
+
+ if (end - start >= parisc_cache_flush_threshold) {
+ flush_cache_all();
return;
}
- flush_tlb_page(vma, vmaddr);
- preempt_disable();
- flush_dcache_page_asm(page_to_phys(page), vmaddr);
- preempt_enable();
+ if (WARN_ON_ONCE(!is_vmalloc_addr((void *)start))) {
+ flush_cache_all();
+ return;
+ }
+
+ vm = find_vm_area((void *)start);
+ if (WARN_ON_ONCE(!vm)) {
+ flush_cache_all();
+ return;
+ }
+
+ /* The physical addresses of IOREMAP regions are contiguous */
+ if (vm->flags & VM_IOREMAP) {
+ physaddr = vm->phys_addr;
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ preempt_disable();
+ flush_dcache_page_asm(physaddr, start);
+ flush_icache_page_asm(physaddr, start);
+ preempt_enable();
+ physaddr += PAGE_SIZE;
+ }
+ return;
+ }
+
+ flush_cache_all();
}
+EXPORT_SYMBOL(flush_cache_vmap);
+/*
+ * The vm_struct has been retired and the page table is set up. The
+ * last page in the range is a guard page. Its physical address can't
+ * be determined using lpa, so there is no way to flush the range
+ * using flush_dcache_page_asm.
+ */
+void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+ /* Prevent cache move-in */
+ flush_tlb_kernel_range(start, end);
+ flush_data_cache();
+}
+EXPORT_SYMBOL(flush_cache_vunmap);
+
+/*
+ * On systems with PA8800/PA8900 processors, there is no way to flush
+ * a vmap range other than using the architected loop to flush the
+ * entire cache. The page directory is not set up, so we can't use
+ * fdc, etc. FDCE/FICE don't work to flush a portion of the cache.
+ * L2 is physically indexed but FDCE/FICE instructions in virtual
+ * mode output their virtual address on the core bus, not their
+ * real address. As a result, the L2 cache index formed from the
+ * virtual address will most likely not be the same as the L2 index
+ * formed from the real address.
+ */
void flush_kernel_vmap_range(void *vaddr, int size)
{
unsigned long start = (unsigned long)vaddr;
unsigned long end = start + size;
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- (unsigned long)size >= parisc_cache_flush_threshold) {
- flush_tlb_kernel_range(start, end);
- flush_data_cache();
+ flush_tlb_kernel_range(start, end);
+
+ if (!static_branch_likely(&parisc_has_dcache))
+ return;
+
+ /* If interrupts are disabled, we can only do local flush */
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) {
+ flush_data_cache_local(NULL);
return;
}
- flush_kernel_dcache_range_asm(start, end);
- flush_tlb_kernel_range(start, end);
+ flush_data_cache();
}
EXPORT_SYMBOL(flush_kernel_vmap_range);
@@ -818,15 +920,18 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
/* Ensure DMA is complete */
asm_syncdma();
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- (unsigned long)size >= parisc_cache_flush_threshold) {
- flush_tlb_kernel_range(start, end);
- flush_data_cache();
+ flush_tlb_kernel_range(start, end);
+
+ if (!static_branch_likely(&parisc_has_dcache))
+ return;
+
+ /* If interrupts are disabled, we can only do local flush */
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) {
+ flush_data_cache_local(NULL);
return;
}
- purge_kernel_dcache_range_asm(start, end);
- flush_tlb_kernel_range(start, end);
+ flush_data_cache();
}
EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index de10437fd206..4cba724c8899 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -92,9 +92,25 @@ __pu_failed: \
: label)
#endif
+#ifdef CONFIG_CC_IS_CLANG
+#define DS_FORM_CONSTRAINT "Z<>"
+#else
+#define DS_FORM_CONSTRAINT "YZ<>"
+#endif
+
#ifdef __powerpc64__
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define __put_user_asm2_goto(x, ptr, label) \
__put_user_asm_goto(x, ptr, label, "std")
+#else
+#define __put_user_asm2_goto(x, addr, label) \
+ asm goto ("1: std%U1%X1 %0,%1 # put_user\n" \
+ EX_TABLE(1b, %l2) \
+ : \
+ : "r" (x), DS_FORM_CONSTRAINT (*addr) \
+ : \
+ : label)
+#endif // CONFIG_PPC_KERNEL_PREFIXED
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
asm goto( \
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 40aa58206888..e52b848b64b7 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -398,6 +398,7 @@ static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
hard_irq_disable();
mpic_teardown_this_cpu(secondary);
+#ifdef CONFIG_CRASH_DUMP
if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) {
/*
* We enter the crash kernel on whatever cpu crashed,
@@ -406,9 +407,11 @@ static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
*/
disable_threadbit = 1;
disable_cpu = cpu_first_thread_sibling(cpu);
- } else if (sibling != crashing_cpu &&
- cpu_thread_in_core(cpu) == 0 &&
- cpu_thread_in_core(sibling) != 0) {
+ } else if (sibling == crashing_cpu) {
+ return;
+ }
+#endif
+ if (cpu_thread_in_core(cpu) == 0 && cpu_thread_in_core(sibling) != 0) {
disable_threadbit = 2;
disable_cpu = sibling;
}
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 0eb689351b7d..5cd407c6a8e4 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
{
- u32 hart, group = 0;
+ u32 hart = 0, group = 0;
- hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
- GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+ if (aia->nr_hart_bits)
+ hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+ GENMASK_ULL(aia->nr_hart_bits - 1, 0);
if (aia->nr_group_bits)
group = (addr >> aia->nr_group_shift) &
GENMASK_ULL(aia->nr_group_bits - 1, 0);
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index 994adc26db4b..e5706f5f2c71 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -718,9 +718,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
switch (reg_subtype) {
case KVM_REG_RISCV_ISA_SINGLE:
return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
- case KVM_REG_RISCV_SBI_MULTI_EN:
+ case KVM_REG_RISCV_ISA_MULTI_EN:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
- case KVM_REG_RISCV_SBI_MULTI_DIS:
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
default:
return -ENOENT;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 968761843203..46b4ad418f6b 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -235,18 +235,19 @@ static void __init setup_bootmem(void)
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
/*
- * memblock allocator is not aware of the fact that last 4K bytes of
- * the addressable memory can not be mapped because of IS_ERR_VALUE
- * macro. Make sure that last 4k bytes are not usable by memblock
- * if end of dram is equal to maximum addressable memory. For 64-bit
- * kernel, this problem can't happen here as the end of the virtual
- * address space is occupied by the kernel mapping then this check must
- * be done as soon as the kernel mapping base address is determined.
+ * Reserve physical address space that would be mapped to virtual
+ * addresses greater than (void *)(-PAGE_SIZE) because:
+ * - This memory would overlap with ERR_PTR
+ * - This memory belongs to high memory, which is not supported
+ *
+ * This is not applicable to 64-bit kernel, because virtual addresses
+ * after (void *)(-PAGE_SIZE) are not linearly mapped: they are
+ * occupied by kernel mapping. Also it is unrealistic for high memory
+ * to exist on 64-bit platforms.
*/
if (!IS_ENABLED(CONFIG_64BIT)) {
- max_mapped_addr = __pa(~(ulong)0);
- if (max_mapped_addr == (phys_ram_end - 1))
- memblock_set_current_limit(max_mapped_addr - 4096);
+ max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE);
+ memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr);
}
min_low_pfn = PFN_UP(phys_ram_base);
@@ -668,6 +669,9 @@ void __init create_pgd_mapping(pgd_t *pgdp,
static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
phys_addr_t size)
{
+ if (debug_pagealloc_enabled())
+ return PAGE_SIZE;
+
if (pgtable_l5_enabled &&
!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
return P4D_SIZE;
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 410056a50aa9..271d01a5ba4d 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -387,17 +387,33 @@ int set_direct_map_default_noflush(struct page *page)
}
#ifdef CONFIG_DEBUG_PAGEALLOC
+static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data)
+{
+ int enable = *(int *)data;
+
+ unsigned long val = pte_val(ptep_get(pte));
+
+ if (enable)
+ val |= _PAGE_PRESENT;
+ else
+ val &= ~_PAGE_PRESENT;
+
+ set_pte(pte, __pte(val));
+
+ return 0;
+}
+
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
if (!debug_pagealloc_enabled())
return;
- if (enable)
- __set_memory((unsigned long)page_address(page), numpages,
- __pgprot(_PAGE_PRESENT), __pgprot(0));
- else
- __set_memory((unsigned long)page_address(page), numpages,
- __pgprot(0), __pgprot(_PAGE_PRESENT));
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long size = PAGE_SIZE * numpages;
+
+ apply_to_existing_page_range(&init_mm, start, size, debug_pagealloc_set_page, &enable);
+
+ flush_tlb_kernel_range(start, start + size);
}
#endif
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e9522c6893be..8da3466775de 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -116,9 +116,9 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
-vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
-$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index c4ea5258ab55..9049f390d834 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -119,8 +119,8 @@ static void init_heap(void)
char *stack_end;
if (boot_params.hdr.loadflags & CAN_USE_HEAP) {
- asm("leal %P1(%%esp),%0"
- : "=r" (stack_end) : "i" (-STACK_SIZE));
+ asm("leal %n1(%%esp),%0"
+ : "=r" (stack_end) : "i" (STACK_SIZE));
heap_end = (char *)
((size_t)boot_params.hdr.heap_end_ptr + 0x200);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 67b68d0d17d1..0cb2396de066 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -294,10 +294,10 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, if CPU has feature1, newinstr1 is used.
* Otherwise, oldinstr is used.
*/
-#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \
- ft_flags2, input...) \
- asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \
- newinstr2, ft_flags2) \
+#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \
+ ft_flags2, input...) \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \
+ newinstr2, ft_flags2) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
@@ -307,7 +307,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
/* Like alternative_io, but for replacing a direct call with another one. */
#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \
- asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \
+ asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \
: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
/*
@@ -316,12 +316,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, if CPU has feature1, function1 is used.
* Otherwise, old function is used.
*/
-#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
- output, input...) \
- asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\
- "call %P[new2]", ft_flags2) \
- : output, ASM_CALL_CONSTRAINT \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
+ output, input...) \
+ asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \
+ "call %c[new2]", ft_flags2) \
+ : output, ASM_CALL_CONSTRAINT \
+ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
[new2] "i" (newfunc2), ## input)
/*
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 3486d91b8595..d510405e4e1d 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -24,7 +24,7 @@ typedef struct {
#ifdef CONFIG_X86_CMPXCHG64
#define __alternative_atomic64(f, g, out, in...) \
- asm volatile("call %P[func]" \
+ asm volatile("call %c[func]" \
: out : [func] "i" (atomic64_##g##_cx8), ## in)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 686e92d2663e..3508f3fc928d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
static __always_inline bool _static_cpu_has(u16 bit)
{
asm goto(
- ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+ ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
" testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n"
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 798183867d78..b71ad173f877 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -100,7 +100,7 @@
}
#define ASM_CALL_ARG0 \
- "call %P[__func] \n" \
+ "call %c[__func] \n" \
ASM_REACHABLE
#define ASM_CALL_ARG1 \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 237dc8cdd12b..3a7755c1a441 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -78,10 +78,10 @@ extern int __get_user_bad(void);
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
- asm volatile("call __" #fn "_%P4" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
- : "0" (ptr), "i" (sizeof(*(ptr)))); \
+ : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \
instrument_get_user(__val_gu); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
@@ -177,7 +177,7 @@ extern void __put_user_nocheck_8(void);
__chk_user_ptr(__ptr); \
__ptr_pu = __ptr; \
__val_pu = __x; \
- asm volatile("call __" #fn "_%P[size]" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=c" (__ret_pu), \
ASM_CALL_CONSTRAINT \
: "0" (__ptr_pu), \
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3cf156f70859..027a8c7a2c9e 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -215,7 +215,14 @@ out:
int amd_smn_read(u16 node, u32 address, u32 *value)
{
- return __amd_smn_rw(node, address, value, false);
+ int err = __amd_smn_rw(node, address, value, false);
+
+ if (PCI_POSSIBLE_ERROR(*value)) {
+ err = -ENODEV;
+ *value = 0;
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(amd_smn_read);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ae987a26f26e..198200782827 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1053,18 +1053,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
void get_cpu_address_sizes(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
- bool vp_bits_from_cpuid = true;
if (!cpu_has(c, X86_FEATURE_CPUID) ||
- (c->extended_cpuid_level < 0x80000008))
- vp_bits_from_cpuid = false;
-
- if (vp_bits_from_cpuid) {
- cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
-
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- } else {
+ (c->extended_cpuid_level < 0x80000008)) {
if (IS_ENABLED(CONFIG_X86_64)) {
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
@@ -1078,7 +1069,17 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
cpu_has(c, X86_FEATURE_PSE36))
c->x86_phys_bits = 36;
}
+ } else {
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
+
+ /* Provide a sane default if not enumerated: */
+ if (!c->x86_clflush_size)
+ c->x86_clflush_size = 32;
}
+
c->x86_cache_bits = c->x86_phys_bits;
c->x86_cache_alignment = c->x86_clflush_size;
}
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b180d8e497c3..cc0f7f70b17b 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -295,8 +295,15 @@ void machine_kexec_cleanup(struct kimage *image)
void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
- void *control_page;
+ unsigned int host_mem_enc_active;
int save_ftrace_enabled;
+ void *control_page;
+
+ /*
+ * This must be done before load_segments() since if call depth tracking
+ * is used then GS must be valid to make any function calls.
+ */
+ host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
@@ -358,7 +365,7 @@ void machine_kexec(struct kimage *image)
(unsigned long)page_list,
image->start,
image->preserve_context,
- cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT));
+ host_mem_enc_active);
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 759581bb2128..4471b4e08d23 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -666,6 +666,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
return ret;
vcpu->arch.guest_state_protected = true;
+
+ /*
+ * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
+ * only after setting guest_state_protected because KVM_SET_MSRS allows
+ * dynamic toggling of LBRV (for performance reason) on write access to
+ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
+ */
+ svm_enable_lbrv(vcpu);
return 0;
}
@@ -2269,6 +2277,12 @@ void __init sev_hardware_setup(void)
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
goto out;
+ if (!lbrv) {
+ WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
+ "LBRV must be present for SEV-ES support");
+ goto out;
+ }
+
/* Has the system been allocated ASIDs for SEV-ES? */
if (min_sev_asid == 1)
goto out;
@@ -3034,7 +3048,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
/*
* An SEV-ES guest requires a VMSA area that is a separate from the
@@ -3086,10 +3099,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
/* Clear intercepts on selected MSRs */
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}
void sev_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 308416b50b03..4650153afa46 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -99,6 +99,7 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_IA32_SPEC_CTRL, .always = false },
{ .index = MSR_IA32_PRED_CMD, .always = false },
{ .index = MSR_IA32_FLUSH_CMD, .always = false },
+ { .index = MSR_IA32_DEBUGCTLMSR, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -215,7 +216,7 @@ int vgif = true;
module_param(vgif, int, 0444);
/* enable/disable LBR virtualization */
-static int lbrv = true;
+int lbrv = true;
module_param(lbrv, int, 0444);
static int tsc_scaling = true;
@@ -990,7 +991,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
vmcb_mark_dirty(to_vmcb, VMCB_LBR);
}
-static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1000,6 +1001,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+ if (sev_es_guest(vcpu->kvm))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
+
/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
if (is_guest_mode(vcpu))
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
@@ -1009,6 +1013,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
@@ -5260,6 +5266,12 @@ static __init int svm_hardware_setup(void)
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+ if (lbrv) {
+ if (!boot_cpu_has(X86_FEATURE_LBRV))
+ lbrv = false;
+ else
+ pr_info("LBR virtualization supported\n");
+ }
/*
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
@@ -5313,14 +5325,6 @@ static __init int svm_hardware_setup(void)
svm_x86_ops.set_vnmi_pending = NULL;
}
-
- if (lbrv) {
- if (!boot_cpu_has(X86_FEATURE_LBRV))
- lbrv = false;
- else
- pr_info("LBR virtualization supported\n");
- }
-
if (!enable_pmu)
pr_info("PMU virtualization is disabled\n");
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 33878efdebc8..2ed3015e03f1 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 47
+#define MAX_DIRECT_ACCESS_MSRS 48
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -39,6 +39,7 @@ extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
extern bool vnmi;
+extern int lbrv;
/*
* Clean bits in VMCB.
@@ -543,6 +544,7 @@ u32 *svm_vcpu_alloc_msrpm(void);
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
void svm_vcpu_free_msrpm(u32 *msrpm);
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 10d5ed8b5990..a1cb3a4e6742 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -44,7 +44,11 @@
or %rdx, %rax
.else
cmp $TASK_SIZE_MAX-\size+1, %eax
+.if \size != 8
jae .Lbad_get_user
+.else
+ jae .Lbad_get_user_8
+.endif
sbb %edx, %edx /* array_index_mask_nospec() */
and %edx, %eax
.endif
@@ -154,7 +158,7 @@ SYM_CODE_END(__get_user_handle_exception)
#ifdef CONFIG_X86_32
SYM_CODE_START_LOCAL(__get_user_8_handle_exception)
ASM_CLAC
-bad_get_user_8:
+.Lbad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ce84ba86e69e..6ce10e3c6228 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -493,7 +493,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region);
- if (nid != MAX_NUMNODES)
+ if (nid != NUMA_NO_NODE)
node_set(nid, reserved_nodemask);
}
@@ -614,9 +614,9 @@ static int __init numa_init(int (*init_func)(void))
nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
- MAX_NUMNODES));
+ NUMA_NO_NODE));
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
- MAX_NUMNODES));
+ NUMA_NO_NODE));
/* In case that parsing SRAT failed. */
WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
numa_reset_distance();