summaryrefslogtreecommitdiff
path: root/arch/riscv/include
diff options
context:
space:
mode:
authorPalmer Dabbelt <palmer@rivosinc.com>2024-04-29 20:49:39 +0300
committerPalmer Dabbelt <palmer@rivosinc.com>2024-04-30 20:35:48 +0300
commit4f16345d92006110a9e686ee0d68a5d64a10fe83 (patch)
treeff067eb46739e7cba218b788887e66d7981a3fd0 /arch/riscv/include
parent48b4fc66939d5ed49da305cad9788dcd953b5cd2 (diff)
parentdaef19263fc102551d734f39d9ad417098ffb360 (diff)
downloadlinux-4f16345d92006110a9e686ee0d68a5d64a10fe83.tar.xz
Merge patch series "riscv: ASID-related and UP-related TLB flush enhancements"
Samuel Holland <samuel.holland@sifive.com> says: This series converts uniprocessor kernel builds to use the same TLB flushing code as SMP builds, to take advantage of batching and existing range- and ASID-based TLB flush optimizations. It optimizes out IPIs and SBI calls based on the online CPU count, which also covers the scenario where SMP was enabled at build time but only one CPU is present/online. A final optimization is to use single-ASID flushes wherever possible, to avoid unnecessary TLB misses for kernel mappings. This series has a semantic conflict with the AIA patches that are in linux-next due to the removal of the third parameter of riscv_ipi_set_virq_range(), which is called from imsic_ipi_domain_init() in drivers/irqchip/irq-riscv-imsic-early.c. The resolution is to remove the extra argument from the call site. Here are some numbers from D1 which show the performance impact: v6.9-rc1: System Benchmarks Partial Index BASELINE RESULT INDEX Execl Throughput 43.0 198.5 46.2 File Copy 1024 bufsize 2000 maxblocks 3960.0 73934.4 186.7 File Copy 256 bufsize 500 maxblocks 1655.0 20242.6 122.3 File Copy 4096 bufsize 8000 maxblocks 5800.0 197706.4 340.9 Pipe Throughput 12440.0 176974.2 142.3 Pipe-based Context Switching 4000.0 23626.8 59.1 Process Creation 126.0 449.9 35.7 Shell Scripts (1 concurrent) 42.4 544.4 128.4 Shell Scripts (16 concurrent) --- 35.3 --- Shell Scripts (8 concurrent) 6.0 71.6 119.3 System Call Overhead 15000.0 248072.6 165.4 ======== System Benchmarks Index Score (Partial Only) 110.6 v6.9-rc1 + this patch series: System Benchmarks Partial Index BASELINE RESULT INDEX Execl Throughput 43.0 196.8 45.8 File Copy 1024 bufsize 2000 maxblocks 3960.0 71782.2 181.3 File Copy 256 bufsize 500 maxblocks 1655.0 21269.4 128.5 File Copy 4096 bufsize 8000 maxblocks 5800.0 199424.0 343.8 Pipe Throughput 12440.0 196468.6 157.9 Pipe-based Context Switching 4000.0 24261.8 60.7 Process Creation 126.0 459.0 36.4 Shell Scripts (1 concurrent) 42.4 543.8 128.2 Shell Scripts (16 concurrent) --- 35.5 --- Shell Scripts (8 concurrent) 6.0 71.7 119.6 System Call Overhead 15000.0 259415.2 172.9 ======== System Benchmarks Index Score (Partial Only) 113.0 * b4-shazam-lts: riscv: mm: Always use an ASID to flush mm contexts riscv: mm: Preserve global TLB entries when switching contexts riscv: mm: Make asid_bits a local variable riscv: mm: Use a fixed layout for the MM context ID riscv: mm: Introduce cntx2asid/cntx2version helper macros riscv: Avoid TLB flush loops when affected by SiFive CIP-1200 riscv: Apply SiFive CIP-1200 workaround to single-ASID sfence.vma riscv: mm: Combine the SMP and UP TLB flush code riscv: Only send remote fences when some other CPU is online riscv: mm: Broadcast kernel TLB flushes only when needed riscv: Use IPIs for remote cache/TLB flushes by default riscv: Factor out page table TLB synchronization riscv: Flush the instruction cache during SMP bringup Link: https://lore.kernel.org/r/20240327045035.368512-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv/include')
-rw-r--r--arch/riscv/include/asm/errata_list.h12
-rw-r--r--arch/riscv/include/asm/mmu.h3
-rw-r--r--arch/riscv/include/asm/pgalloc.h32
-rw-r--r--arch/riscv/include/asm/sbi.h4
-rw-r--r--arch/riscv/include/asm/smp.h15
-rw-r--r--arch/riscv/include/asm/tlbflush.h52
6 files changed, 56 insertions, 62 deletions
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 1f2dbfb8a8bf..35ce26899960 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -43,11 +43,21 @@ ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \
CONFIG_ERRATA_SIFIVE_CIP_453)
#else /* !__ASSEMBLY__ */
-#define ALT_FLUSH_TLB_PAGE(x) \
+#define ALT_SFENCE_VMA_ASID(asid) \
+asm(ALTERNATIVE("sfence.vma x0, %0", "sfence.vma", SIFIVE_VENDOR_ID, \
+ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
+ : : "r" (asid) : "memory")
+
+#define ALT_SFENCE_VMA_ADDR(addr) \
asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
: : "r" (addr) : "memory")
+#define ALT_SFENCE_VMA_ADDR_ASID(addr, asid) \
+asm(ALTERNATIVE("sfence.vma %0, %1", "sfence.vma", SIFIVE_VENDOR_ID, \
+ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
+ : : "r" (addr), "r" (asid) : "memory")
+
/*
* _val is marked as "will be overwritten", so need to set it to 0
* in the default case.
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 60be458e94da..947fd60f9051 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -28,6 +28,9 @@ typedef struct {
#endif
} mm_context_t;
+#define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK)
+#define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK)
+
void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot);
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index deaf971253a2..f52264304f77 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -8,6 +8,7 @@
#define _ASM_RISCV_PGALLOC_H
#include <linux/mm.h>
+#include <asm/sbi.h>
#include <asm/tlb.h>
#ifdef CONFIG_MMU
@@ -15,6 +16,14 @@
#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
+static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
+{
+ if (riscv_use_sbi_for_rfence())
+ tlb_remove_ptdesc(tlb, pt);
+ else
+ tlb_remove_page_ptdesc(tlb, pt);
+}
+
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
@@ -102,10 +111,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
struct ptdesc *ptdesc = virt_to_ptdesc(pud);
pagetable_pud_dtor(ptdesc);
- if (riscv_use_ipi_for_rfence())
- tlb_remove_page_ptdesc(tlb, ptdesc);
- else
- tlb_remove_ptdesc(tlb, ptdesc);
+ riscv_tlb_remove_ptdesc(tlb, ptdesc);
}
}
@@ -139,12 +145,8 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long addr)
{
- if (pgtable_l5_enabled) {
- if (riscv_use_ipi_for_rfence())
- tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d));
- else
- tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
- }
+ if (pgtable_l5_enabled)
+ riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
}
#endif /* __PAGETABLE_PMD_FOLDED */
@@ -176,10 +178,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
pagetable_pmd_dtor(ptdesc);
- if (riscv_use_ipi_for_rfence())
- tlb_remove_page_ptdesc(tlb, ptdesc);
- else
- tlb_remove_ptdesc(tlb, ptdesc);
+ riscv_tlb_remove_ptdesc(tlb, ptdesc);
}
#endif /* __PAGETABLE_PMD_FOLDED */
@@ -190,10 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
struct ptdesc *ptdesc = page_ptdesc(pte);
pagetable_pte_dtor(ptdesc);
- if (riscv_use_ipi_for_rfence())
- tlb_remove_page_ptdesc(tlb, ptdesc);
- else
- tlb_remove_ptdesc(tlb, ptdesc);
+ riscv_tlb_remove_ptdesc(tlb, ptdesc);
}
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 6e68f8dff76b..ea84392ca9d7 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -375,8 +375,12 @@ unsigned long riscv_cached_marchid(unsigned int cpu_id);
unsigned long riscv_cached_mimpid(unsigned int cpu_id);
#if IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_RISCV_SBI)
+DECLARE_STATIC_KEY_FALSE(riscv_sbi_for_rfence);
+#define riscv_use_sbi_for_rfence() \
+ static_branch_unlikely(&riscv_sbi_for_rfence)
void sbi_ipi_init(void);
#else
+static inline bool riscv_use_sbi_for_rfence(void) { return false; }
static inline void sbi_ipi_init(void) { }
#endif
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index 0d555847cde6..7ac80e9f2288 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -49,12 +49,7 @@ void riscv_ipi_disable(void);
bool riscv_ipi_have_virq_range(void);
/* Set the IPI interrupt numbers for arch (called by irqchip drivers) */
-void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence);
-
-/* Check if we can use IPIs for remote FENCEs */
-DECLARE_STATIC_KEY_FALSE(riscv_ipi_for_rfence);
-#define riscv_use_ipi_for_rfence() \
- static_branch_unlikely(&riscv_ipi_for_rfence)
+void riscv_ipi_set_virq_range(int virq, int nr);
/* Check other CPUs stop or not */
bool smp_crash_stop_failed(void);
@@ -104,16 +99,10 @@ static inline bool riscv_ipi_have_virq_range(void)
return false;
}
-static inline void riscv_ipi_set_virq_range(int virq, int nr,
- bool use_for_rfence)
+static inline void riscv_ipi_set_virq_range(int virq, int nr)
{
}
-static inline bool riscv_use_ipi_for_rfence(void)
-{
- return false;
-}
-
#endif /* CONFIG_SMP */
#if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 4112cc8d1d69..72e559934952 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -15,24 +15,34 @@
#define FLUSH_TLB_NO_ASID ((unsigned long)-1)
#ifdef CONFIG_MMU
-extern unsigned long asid_mask;
-
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
}
+static inline void local_flush_tlb_all_asid(unsigned long asid)
+{
+ if (asid != FLUSH_TLB_NO_ASID)
+ ALT_SFENCE_VMA_ASID(asid);
+ else
+ local_flush_tlb_all();
+}
+
/* Flush one page from local TLB */
static inline void local_flush_tlb_page(unsigned long addr)
{
- ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
+ ALT_SFENCE_VMA_ADDR(addr);
+}
+
+static inline void local_flush_tlb_page_asid(unsigned long addr,
+ unsigned long asid)
+{
+ if (asid != FLUSH_TLB_NO_ASID)
+ ALT_SFENCE_VMA_ADDR_ASID(addr, asid);
+ else
+ local_flush_tlb_page(addr);
}
-#else /* CONFIG_MMU */
-#define local_flush_tlb_all() do { } while (0)
-#define local_flush_tlb_page(addr) do { } while (0)
-#endif /* CONFIG_MMU */
-#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
void flush_tlb_all(void);
void flush_tlb_mm(struct mm_struct *mm);
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -55,27 +65,9 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
void arch_flush_tlb_batched_pending(struct mm_struct *mm);
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
-#else /* CONFIG_SMP && CONFIG_MMU */
-
-#define flush_tlb_all() local_flush_tlb_all()
-#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- local_flush_tlb_all();
-}
-
-/* Flush a range of kernel pages */
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- local_flush_tlb_all();
-}
-
-#define flush_tlb_mm(mm) flush_tlb_all()
-#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
-#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
-#endif /* !CONFIG_SMP || !CONFIG_MMU */
+extern unsigned long tlb_flush_all_threshold;
+#else /* CONFIG_MMU */
+#define local_flush_tlb_all() do { } while (0)
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_TLBFLUSH_H */