From 6b9391b581fddd8579239dad4de4f0393149e10a Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 12 Mar 2024 16:53:41 -0700 Subject: riscv: Include riscv_set_icache_flush_ctx prctl Support new prctl with key PR_RISCV_SET_ICACHE_FLUSH_CTX to enable optimization of cross modifying code. This prctl enables userspace code to use icache flushing instructions such as fence.i with the guarantee that the icache will continue to be clean after thread migration. Signed-off-by: Charlie Jenkins Reviewed-by: Atish Patra Reviewed-by: Alexandre Ghiti Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20240312-fencei-v13-2-4b6bdc2bbf32@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ arch/riscv/mm/context.c | 19 ++++---- 2 files changed, 124 insertions(+), 8 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 55a34f2020a8..15a95578e670 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -152,3 +153,115 @@ void __init riscv_init_cbo_blocksizes(void) if (cboz_block_size) riscv_cboz_block_size = cboz_block_size; } + +#ifdef CONFIG_SMP +static void set_icache_stale_mask(void) +{ + cpumask_t *mask; + bool stale_cpu; + + /* + * Mark every other hart's icache as needing a flush for + * this MM. Maintain the previous value of the current + * cpu to handle the case when this function is called + * concurrently on different harts. + */ + mask = ¤t->mm->context.icache_stale_mask; + stale_cpu = cpumask_test_cpu(smp_processor_id(), mask); + + cpumask_setall(mask); + assign_bit(cpumask_check(smp_processor_id()), cpumask_bits(mask), stale_cpu); +} +#endif + +/** + * riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in + * userspace. + * @ctx: Set the type of icache flushing instructions permitted/prohibited in + * userspace. Supported values described below. + * + * Supported values for ctx: + * + * * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in user space. + * + * * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in user space. All threads in + * a process will be affected when ``scope == PR_RISCV_SCOPE_PER_PROCESS``. + * Therefore, caution must be taken; use this flag only when you can guarantee + * that no thread in the process will emit fence.i from this point onward. + * + * @scope: Set scope of where icache flushing instructions are allowed to be + * emitted. Supported values described below. + * + * Supported values for scope: + * + * * %PR_RISCV_SCOPE_PER_PROCESS: Ensure the icache of any thread in this process + * is coherent with instruction storage upon + * migration. + * + * * %PR_RISCV_SCOPE_PER_THREAD: Ensure the icache of the current thread is + * coherent with instruction storage upon + * migration. + * + * When ``scope == PR_RISCV_SCOPE_PER_PROCESS``, all threads in the process are + * permitted to emit icache flushing instructions. Whenever any thread in the + * process is migrated, the corresponding hart's icache will be guaranteed to be + * consistent with instruction storage. This does not enforce any guarantees + * outside of migration. If a thread modifies an instruction that another thread + * may attempt to execute, the other thread must still emit an icache flushing + * instruction before attempting to execute the potentially modified + * instruction. This must be performed by the user-space program. + * + * In per-thread context (eg. ``scope == PR_RISCV_SCOPE_PER_THREAD``) only the + * thread calling this function is permitted to emit icache flushing + * instructions. When the thread is migrated, the corresponding hart's icache + * will be guaranteed to be consistent with instruction storage. + * + * On kernels configured without SMP, this function is a nop as migrations + * across harts will not occur. + */ +int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope) +{ +#ifdef CONFIG_SMP + switch (ctx) { + case PR_RISCV_CTX_SW_FENCEI_ON: + switch (scope) { + case PR_RISCV_SCOPE_PER_PROCESS: + current->mm->context.force_icache_flush = true; + break; + case PR_RISCV_SCOPE_PER_THREAD: + current->thread.force_icache_flush = true; + break; + default: + return -EINVAL; + } + break; + case PR_RISCV_CTX_SW_FENCEI_OFF: + switch (scope) { + case PR_RISCV_SCOPE_PER_PROCESS: + current->mm->context.force_icache_flush = false; + + set_icache_stale_mask(); + break; + case PR_RISCV_SCOPE_PER_THREAD: + current->thread.force_icache_flush = false; + + set_icache_stale_mask(); + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + return 0; +#else + switch (ctx) { + case PR_RISCV_CTX_SW_FENCEI_ON: + case PR_RISCV_CTX_SW_FENCEI_OFF: + return 0; + default: + return -EINVAL; + } +#endif +} diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 217fd4de6134..beef30f42d5c 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_MMU @@ -297,21 +298,23 @@ static inline void set_mm(struct mm_struct *prev, * * The "cpu" argument must be the current local CPU number. */ -static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu) +static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu, + struct task_struct *task) { #ifdef CONFIG_SMP - cpumask_t *mask = &mm->context.icache_stale_mask; - - if (cpumask_test_cpu(cpu, mask)) { - cpumask_clear_cpu(cpu, mask); + if (cpumask_test_and_clear_cpu(cpu, &mm->context.icache_stale_mask)) { /* * Ensure the remote hart's writes are visible to this hart. * This pairs with a barrier in flush_icache_mm. */ smp_mb(); - local_flush_icache_all(); - } + /* + * If cache will be flushed in switch_to, no need to flush here. + */ + if (!(task && switch_to_should_flush_icache(task))) + local_flush_icache_all(); + } #endif } @@ -332,5 +335,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, set_mm(prev, next, cpu); - flush_icache_deferred(next, cpu); + flush_icache_deferred(next, cpu, task); } -- cgit v1.2.3 From decde1fa209323c77a7498e803350c5f3e992d62 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 12 Mar 2024 16:53:43 -0700 Subject: cpumask: Add assign cpu Standardize an assign_cpu function for cpumasks. Signed-off-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240312-fencei-v13-4-4b6bdc2bbf32@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 2 +- include/linux/cpumask.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 15a95578e670..9dad35f0ce8b 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -170,7 +170,7 @@ static void set_icache_stale_mask(void) stale_cpu = cpumask_test_cpu(smp_processor_id(), mask); cpumask_setall(mask); - assign_bit(cpumask_check(smp_processor_id()), cpumask_bits(mask), stale_cpu); + cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu); } #endif diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cfb545841a2c..1b85e09c4ba5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -492,6 +492,22 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) __clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); } +/** + * cpumask_assign_cpu - assign a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + * @bool: the value to assign + */ +static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) +{ + assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); +} + +static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) +{ + __assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); +} + /** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) -- cgit v1.2.3 From dc892fb443224da7018891a5fac9cb6ac50c14b3 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:44 -0700 Subject: riscv: Use IPIs for remote cache/TLB flushes by default An IPI backend is always required in an SMP configuration, but an SBI implementation is not. For example, SBI will be unavailable when the kernel runs in M mode. For this reason, consider IPI delivery of cache and TLB flushes to be the base case, and any other implementation (such as the SBI remote fence extension) to be an optimization. Generally, if IPIs can be delivered without firmware assistance, they are assumed to be faster than SBI calls due to the SBI context switch overhead. However, when SBI is used as the IPI backend, then the context switch cost must be paid anyway, and performing the cache/TLB flush directly in the SBI implementation is more efficient than injecting an interrupt to S-mode. This is the only existing scenario where riscv_ipi_set_virq_range() is called with use_for_rfence set to false. sbi_ipi_init() already checks riscv_ipi_have_virq_range(), so it only calls riscv_ipi_set_virq_range() when no other IPI device is available. This allows moving the static key and dropping the use_for_rfence parameter. This decouples the static key from the irqchip driver probe order. Furthermore, the static branch only makes sense when CONFIG_RISCV_SBI is enabled. Optherwise, IPIs must be used. Add a fallback definition of riscv_use_sbi_for_rfence() which handles this case and removes the need to check CONFIG_RISCV_SBI elsewhere, such as in cacheflush.c. Reviewed-by: Anup Patel Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-4-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgalloc.h | 7 ++++--- arch/riscv/include/asm/sbi.h | 4 ++++ arch/riscv/include/asm/smp.h | 15 ++------------- arch/riscv/kernel/sbi-ipi.c | 11 ++++++++++- arch/riscv/kernel/smp.c | 11 +---------- arch/riscv/mm/cacheflush.c | 5 ++--- arch/riscv/mm/tlbflush.c | 31 ++++++++++++++----------------- drivers/clocksource/timer-clint.c | 2 +- 8 files changed, 38 insertions(+), 48 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index b34587da8882..f52264304f77 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -8,6 +8,7 @@ #define _ASM_RISCV_PGALLOC_H #include +#include #include #ifdef CONFIG_MMU @@ -17,10 +18,10 @@ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, pt); - else + if (riscv_use_sbi_for_rfence()) tlb_remove_ptdesc(tlb, pt); + else + tlb_remove_page_ptdesc(tlb, pt); } static inline void pmd_populate_kernel(struct mm_struct *mm, diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6e68f8dff76b..ea84392ca9d7 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -375,8 +375,12 @@ unsigned long riscv_cached_marchid(unsigned int cpu_id); unsigned long riscv_cached_mimpid(unsigned int cpu_id); #if IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_RISCV_SBI) +DECLARE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +#define riscv_use_sbi_for_rfence() \ + static_branch_unlikely(&riscv_sbi_for_rfence) void sbi_ipi_init(void); #else +static inline bool riscv_use_sbi_for_rfence(void) { return false; } static inline void sbi_ipi_init(void) { } #endif diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 0d555847cde6..7ac80e9f2288 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -49,12 +49,7 @@ void riscv_ipi_disable(void); bool riscv_ipi_have_virq_range(void); /* Set the IPI interrupt numbers for arch (called by irqchip drivers) */ -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence); - -/* Check if we can use IPIs for remote FENCEs */ -DECLARE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -#define riscv_use_ipi_for_rfence() \ - static_branch_unlikely(&riscv_ipi_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr); /* Check other CPUs stop or not */ bool smp_crash_stop_failed(void); @@ -104,16 +99,10 @@ static inline bool riscv_ipi_have_virq_range(void) return false; } -static inline void riscv_ipi_set_virq_range(int virq, int nr, - bool use_for_rfence) +static inline void riscv_ipi_set_virq_range(int virq, int nr) { } -static inline bool riscv_use_ipi_for_rfence(void) -{ - return false; -} - #endif /* CONFIG_SMP */ #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index a4559695ce62..1026e22955cc 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -13,6 +13,9 @@ #include #include +DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence); + static int sbi_ipi_virq; static void sbi_ipi_handle(struct irq_desc *desc) @@ -72,6 +75,12 @@ void __init sbi_ipi_init(void) "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); - riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false); + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE); pr_info("providing IPIs using SBI IPI extension\n"); + + /* + * Use the SBI remote fence extension to avoid + * the extra context switch needed to handle IPIs. + */ + static_branch_enable(&riscv_sbi_for_rfence); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 45dd4035416e..8e6eb64459af 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -171,10 +171,7 @@ bool riscv_ipi_have_virq_range(void) return (ipi_virq_base) ? true : false; } -DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence); - -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr) { int i, err; @@ -197,12 +194,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) /* Enabled IPIs for boot CPU immediately */ riscv_ipi_enable(); - - /* Update RFENCE static key */ - if (use_for_rfence) - static_branch_enable(&riscv_ipi_for_rfence); - else - static_branch_disable(&riscv_ipi_for_rfence); } static const char * const ipi_names[] = { diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index bc61ee5975e4..d76fc73e594b 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -21,7 +21,7 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence()) + if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); @@ -69,8 +69,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local) * with flush_icache_deferred(). */ smp_mb(); - } else if (IS_ENABLED(CONFIG_RISCV_SBI) && - !riscv_use_ipi_for_rfence()) { + } else if (riscv_use_sbi_for_rfence()) { sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 893566e004b7..0435605b07d0 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -79,10 +79,10 @@ static void __ipi_flush_tlb_all(void *info) void flush_tlb_all(void) { - if (riscv_use_ipi_for_rfence()) - on_each_cpu(__ipi_flush_tlb_all, NULL, 1); - else + if (riscv_use_sbi_for_rfence()) sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); + else + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); } struct flush_tlb_range_data { @@ -103,7 +103,6 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { - struct flush_tlb_range_data ftd; bool broadcast; if (cpumask_empty(cmask)) @@ -119,20 +118,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, broadcast = true; } - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { + if (!broadcast) { local_flush_tlb_range_asid(start, size, stride, asid); + } else if (riscv_use_sbi_for_rfence()) { + sbi_remote_sfence_vma_asid(cmask, start, size, asid); + } else { + struct flush_tlb_range_data ftd; + + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1); } if (cmask != cpu_online_mask) diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c index 09fd292eb83d..0bdd9d7ec545 100644 --- a/drivers/clocksource/timer-clint.c +++ b/drivers/clocksource/timer-clint.c @@ -251,7 +251,7 @@ static int __init clint_timer_init_dt(struct device_node *np) } irq_set_chained_handler(clint_ipi_irq, clint_ipi_interrupt); - riscv_ipi_set_virq_range(rc, BITS_PER_BYTE, true); + riscv_ipi_set_virq_range(rc, BITS_PER_BYTE); clint_clear_ipi(); #endif -- cgit v1.2.3 From 038ac18aae935c89c874649acde5a82f588a12b4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:45 -0700 Subject: riscv: mm: Broadcast kernel TLB flushes only when needed __flush_tlb_range() avoids broadcasting TLB flushes when an mm context is only active on the local CPU. Apply this same optimization to TLB flushes of kernel memory when only one CPU is online. This check can be constant-folded when SMP is disabled. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 0435605b07d0..da821315d43e 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -103,22 +103,15 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { - bool broadcast; + unsigned int cpu; if (cpumask_empty(cmask)) return; - if (cmask != cpu_online_mask) { - unsigned int cpuid; + cpu = get_cpu(); - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - } else { - broadcast = true; - } - - if (!broadcast) { + /* Check if the TLB flush needs to be sent to other CPUs. */ + if (cpumask_any_but(cmask, cpu) >= nr_cpu_ids) { local_flush_tlb_range_asid(start, size, stride, asid); } else if (riscv_use_sbi_for_rfence()) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); @@ -132,8 +125,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1); } - if (cmask != cpu_online_mask) - put_cpu(); + put_cpu(); } static inline unsigned long get_mm_asid(struct mm_struct *mm) -- cgit v1.2.3 From 9546f00410ed88b8117388cfd74ea59f4616a158 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:46 -0700 Subject: riscv: Only send remote fences when some other CPU is online If no other CPU is online, a local cache or TLB flush is sufficient. These checks can be constant-folded when SMP is disabled. Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-6-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 4 +++- arch/riscv/mm/tlbflush.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index d76fc73e594b..f5be1fec8191 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -21,7 +21,9 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (riscv_use_sbi_for_rfence()) + if (num_online_cpus() < 2) + return; + else if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index da821315d43e..0901aa47b58f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -79,7 +79,9 @@ static void __ipi_flush_tlb_all(void *info) void flush_tlb_all(void) { - if (riscv_use_sbi_for_rfence()) + if (num_online_cpus() < 2) + local_flush_tlb_all(); + else if (riscv_use_sbi_for_rfence()) sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); else on_each_cpu(__ipi_flush_tlb_all, NULL, 1); -- cgit v1.2.3 From c6026d35b6abb5bd954788478bfa800a942e2033 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:47 -0700 Subject: riscv: mm: Combine the SMP and UP TLB flush code In SMP configurations, all TLB flushing narrower than flush_tlb_all() goes through __flush_tlb_range(). Do the same in UP configurations. This allows UP configurations to take advantage of recent improvements to the code in tlbflush.c, such as support for huge pages and flushing multiple-page ranges. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Reviewed-by: Yunhui Cui Link: https://lore.kernel.org/r/20240327045035.368512-7-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/include/asm/tlbflush.h | 31 +++---------------------------- arch/riscv/mm/Makefile | 5 +---- 3 files changed, 5 insertions(+), 33 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index be09c8836d56..442532819a44 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -60,7 +60,7 @@ config RISCV select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 4112cc8d1d69..4f86424b1ba5 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -27,12 +27,7 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } -#else /* CONFIG_MMU */ -#define local_flush_tlb_all() do { } while (0) -#define local_flush_tlb_page(addr) do { } while (0) -#endif /* CONFIG_MMU */ -#if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, @@ -54,28 +49,8 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, unsigned long uaddr); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); - -#else /* CONFIG_SMP && CONFIG_MMU */ - -#define flush_tlb_all() local_flush_tlb_all() -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - local_flush_tlb_all(); -} - -/* Flush a range of kernel pages */ -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - local_flush_tlb_all(); -} - -#define flush_tlb_mm(mm) flush_tlb_all() -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() -#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ +#else /* CONFIG_MMU */ +#define local_flush_tlb_all() do { } while (0) +#endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 2c869f8026a8..cbe4d775ef56 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,14 +13,11 @@ endif KCOV_INSTRUMENT_init.o := n obj-y += init.o -obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o +obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o tlbflush.o obj-y += cacheflush.o obj-y += context.o obj-y += pmem.o -ifeq ($(CONFIG_MMU),y) -obj-$(CONFIG_SMP) += tlbflush.o -endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o -- cgit v1.2.3 From 20e03d702e00a3e0269a1d6f9549c2e370492054 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:48 -0700 Subject: riscv: Apply SiFive CIP-1200 workaround to single-ASID sfence.vma commit 3f1e782998cd ("riscv: add ASID-based tlbflushing methods") added calls to the sfence.vma instruction with rs2 != x0. These single-ASID instruction variants are also affected by SiFive errata CIP-1200. Until now, the errata workaround was not needed for the single-ASID sfence.vma variants, because they were only used when the ASID allocator was enabled, and the affected SiFive platforms do not support multiple ASIDs. However, we are going to start using those sfence.vma variants regardless of ASID support, so now we need alternatives covering them. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-8-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/errata_list.h | 12 +++++++++++- arch/riscv/include/asm/tlbflush.h | 19 ++++++++++++++++++- arch/riscv/mm/tlbflush.c | 23 ----------------------- 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 1f2dbfb8a8bf..35ce26899960 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -43,11 +43,21 @@ ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \ CONFIG_ERRATA_SIFIVE_CIP_453) #else /* !__ASSEMBLY__ */ -#define ALT_FLUSH_TLB_PAGE(x) \ +#define ALT_SFENCE_VMA_ASID(asid) \ +asm(ALTERNATIVE("sfence.vma x0, %0", "sfence.vma", SIFIVE_VENDOR_ID, \ + ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ + : : "r" (asid) : "memory") + +#define ALT_SFENCE_VMA_ADDR(addr) \ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ : : "r" (addr) : "memory") +#define ALT_SFENCE_VMA_ADDR_ASID(addr, asid) \ +asm(ALTERNATIVE("sfence.vma %0, %1", "sfence.vma", SIFIVE_VENDOR_ID, \ + ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ + : : "r" (addr), "r" (asid) : "memory") + /* * _val is marked as "will be overwritten", so need to set it to 0 * in the default case. diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 4f86424b1ba5..463b615d7728 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,10 +22,27 @@ static inline void local_flush_tlb_all(void) __asm__ __volatile__ ("sfence.vma" : : : "memory"); } +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + ALT_SFENCE_VMA_ASID(asid); + else + local_flush_tlb_all(); +} + /* Flush one page from local TLB */ static inline void local_flush_tlb_page(unsigned long addr) { - ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); + ALT_SFENCE_VMA_ADDR(addr); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + ALT_SFENCE_VMA_ADDR_ASID(addr, asid); + else + local_flush_tlb_page(addr); } void flush_tlb_all(void); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 0901aa47b58f..ad7bdcfcc219 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -7,29 +7,6 @@ #include #include -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); - else - local_flush_tlb_all(); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); - else - local_flush_tlb_page(addr); -} - /* * Flush entire TLB if number of entries to be flushed is greater * than the threshold below. -- cgit v1.2.3 From d6dcdabafcd7c612b164079d00da6d9775863a0b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:49 -0700 Subject: riscv: Avoid TLB flush loops when affected by SiFive CIP-1200 Implementations affected by SiFive errata CIP-1200 have a bug which forces the kernel to always use the global variant of the sfence.vma instruction. When affected by this errata, do not attempt to flush a range of addresses; each iteration of the loop would actually flush the whole TLB instead. Instead, minimize the overall number of sfence.vma instructions. Signed-off-by: Samuel Holland Reviewed-by: Yunhui Cui Link: https://lore.kernel.org/r/20240327045035.368512-9-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/sifive/errata.c | 5 +++++ arch/riscv/include/asm/tlbflush.h | 2 ++ arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 3d9a32d791f7..716cfedad3a2 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -42,6 +42,11 @@ static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long imp return false; if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626) return false; + +#ifdef CONFIG_MMU + tlb_flush_all_threshold = 0; +#endif + return true; } diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 463b615d7728..8e329721375b 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -66,6 +66,8 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, unsigned long uaddr); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); + +extern unsigned long tlb_flush_all_threshold; #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #endif /* CONFIG_MMU */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index ad7bdcfcc219..18af7b5053af 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -11,7 +11,7 @@ * Flush entire TLB if number of entries to be flushed is greater * than the threshold below. */ -static unsigned long tlb_flush_all_threshold __read_mostly = 64; +unsigned long tlb_flush_all_threshold __read_mostly = 64; static void local_flush_tlb_range_threshold_asid(unsigned long start, unsigned long size, -- cgit v1.2.3 From 74cd17792d28162fe692d5a25fe5cc081203ad19 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:50 -0700 Subject: riscv: mm: Introduce cntx2asid/cntx2version helper macros When using the ASID allocator, the MM context ID contains two values: the ASID in the lower bits, and the allocator version number in the remaining bits. Use macros to make this separation more obvious. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-10-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 3 +++ arch/riscv/mm/context.c | 12 ++++++------ arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 355504b37f8e..a550fbf770be 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,6 +26,9 @@ typedef struct { #endif } mm_context_t; +#define cntx2asid(cntx) ((cntx) & asid_mask) +#define cntx2version(cntx) ((cntx) & ~asid_mask) + void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot); #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ba8eb3944687..b562b3c44487 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -81,7 +81,7 @@ static void __flush_context(void) if (cntx == 0) cntx = per_cpu(reserved_context, i); - __set_bit(cntx & asid_mask, context_asid_map); + __set_bit(cntx2asid(cntx), context_asid_map); per_cpu(reserved_context, i) = cntx; } @@ -102,7 +102,7 @@ static unsigned long __new_context(struct mm_struct *mm) lockdep_assert_held(&context_lock); if (cntx != 0) { - unsigned long newcntx = ver | (cntx & asid_mask); + unsigned long newcntx = ver | cntx2asid(cntx); /* * If our current CONTEXT was active during a rollover, we @@ -115,7 +115,7 @@ static unsigned long __new_context(struct mm_struct *mm) * We had a valid CONTEXT in a previous life, so try to * re-use it if possible. */ - if (!__test_and_set_bit(cntx & asid_mask, context_asid_map)) + if (!__test_and_set_bit(cntx2asid(cntx), context_asid_map)) return newcntx; } @@ -168,7 +168,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) */ old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu)); if (old_active_cntx && - ((cntx & ~asid_mask) == atomic_long_read(¤t_version)) && + (cntx2version(cntx) == atomic_long_read(¤t_version)) && atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu), old_active_cntx, cntx)) goto switch_mm_fast; @@ -177,7 +177,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current_version. */ cntx = atomic_long_read(&mm->context.id); - if ((cntx & ~asid_mask) != atomic_long_read(¤t_version)) { + if (cntx2version(cntx) != atomic_long_read(¤t_version)) { cntx = __new_context(mm); atomic_long_set(&mm->context.id, cntx); } @@ -191,7 +191,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) switch_mm_fast: csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | - ((cntx & asid_mask) << SATP_ASID_SHIFT) | + (cntx2asid(cntx) << SATP_ASID_SHIFT) | satp_mode); if (need_flush_tlb) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 18af7b5053af..35266dd9a9a2 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -110,7 +110,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, static inline unsigned long get_mm_asid(struct mm_struct *mm) { return static_branch_unlikely(&use_asid_allocator) ? - atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; + cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; } void flush_tlb_mm(struct mm_struct *mm) -- cgit v1.2.3 From f58e5dc45fa93a2f2a0028381b2e06fe74ae9e2c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:51 -0700 Subject: riscv: mm: Use a fixed layout for the MM context ID Currently, the size of the ASID field in the MM context ID dynamically depends on the number of hardware-supported ASID bits. This requires reading a global variable to extract either field from the context ID. Instead, allocate the maximum possible number of bits to the ASID field, so the layout of the context ID is known at compile-time. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-11-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 4 ++-- arch/riscv/include/asm/tlbflush.h | 2 -- arch/riscv/mm/context.c | 6 ++---- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index a550fbf770be..dc0273f7905f 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,8 +26,8 @@ typedef struct { #endif } mm_context_t; -#define cntx2asid(cntx) ((cntx) & asid_mask) -#define cntx2version(cntx) ((cntx) & ~asid_mask) +#define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK) +#define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK) void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot); diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 8e329721375b..72e559934952 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -15,8 +15,6 @@ #define FLUSH_TLB_NO_ASID ((unsigned long)-1) #ifdef CONFIG_MMU -extern unsigned long asid_mask; - static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index b562b3c44487..5315af06cd4d 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -22,7 +22,6 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; -unsigned long asid_mask; static atomic_long_t current_version; @@ -128,7 +127,7 @@ static unsigned long __new_context(struct mm_struct *mm) goto set_asid; /* We're out of ASIDs, so increment current_version */ - ver = atomic_long_add_return_relaxed(num_asids, ¤t_version); + ver = atomic_long_add_return_relaxed(BIT(SATP_ASID_BITS), ¤t_version); /* Flush everything */ __flush_context(); @@ -247,7 +246,6 @@ static int __init asids_init(void) /* Pre-compute ASID details */ if (asid_bits) { num_asids = 1 << asid_bits; - asid_mask = num_asids - 1; } /* @@ -255,7 +253,7 @@ static int __init asids_init(void) * at-least twice more than CPUs */ if (num_asids > (2 * num_possible_cpus())) { - atomic_long_set(¤t_version, num_asids); + atomic_long_set(¤t_version, BIT(SATP_ASID_BITS)); context_asid_map = bitmap_zalloc(num_asids, GFP_KERNEL); if (!context_asid_map) -- cgit v1.2.3 From 8d3e7613f97e4c117467be126d9c0013e9937f77 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:52 -0700 Subject: riscv: mm: Make asid_bits a local variable This variable is only used inside asids_init(). Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-12-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/context.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 5315af06cd4d..0bf6d0070a14 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -20,7 +20,6 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); -static unsigned long asid_bits; static unsigned long num_asids; static atomic_long_t current_version; @@ -226,7 +225,7 @@ static inline void set_mm(struct mm_struct *prev, static int __init asids_init(void) { - unsigned long old; + unsigned long asid_bits, old; /* Figure-out number of ASID bits in HW */ old = csr_read(CSR_SATP); -- cgit v1.2.3 From 8fc21cc672e8ea7954fdc6b59d9ce350730e9c92 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:53 -0700 Subject: riscv: mm: Preserve global TLB entries when switching contexts If the CPU does not support multiple ASIDs, all MM contexts use ASID 0. In this case, it is still beneficial to flush the TLB by ASID, as the single-ASID variant of the sfence.vma instruction preserves TLB entries for global (kernel) pages. This optimization is recommended by the RISC-V privileged specification: If the implementation does not provide ASIDs, or software chooses to always use ASID 0, then after every satp write, software should execute SFENCE.VMA with rs1=x0. In the common case that no global translations have been modified, rs2 should be set to a register other than x0 but which contains the value zero, so that global translations are not flushed. It is not possible to apply this optimization when using the ASID allocator, because that code must flush the TLB for all ASIDs at once when incrementing the version number. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-13-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 0bf6d0070a14..60cb0b82240e 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -200,7 +200,7 @@ static void set_mm_noasid(struct mm_struct *mm) { /* Switch the page table and blindly nuke entire local TLB */ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode); - local_flush_tlb_all(); + local_flush_tlb_all_asid(0); } static inline void set_mm(struct mm_struct *prev, -- cgit v1.2.3 From daef19263fc102551d734f39d9ad417098ffb360 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:54 -0700 Subject: riscv: mm: Always use an ASID to flush mm contexts Even if multiple ASIDs are not supported, using the single-ASID variant of the sfence.vma instruction preserves TLB entries for global (kernel) pages. So it is always more efficient to use the single-ASID code path. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-14-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 35266dd9a9a2..44e7ed4e194f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -109,8 +109,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, static inline unsigned long get_mm_asid(struct mm_struct *mm) { - return static_branch_unlikely(&use_asid_allocator) ? - cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; + return cntx2asid(atomic_long_read(&mm->context.id)); } void flush_tlb_mm(struct mm_struct *mm) -- cgit v1.2.3 From 13953e381ae2891d1699a73c20e8e7fa31699426 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 20 Mar 2024 14:47:11 +0800 Subject: riscv: Remove redundant CONFIG_64BIT from pgtable_l{4,5}_enabled IS_ENABLED(CONFIG_64BIT) in initialization of pgtable_l{4,5}_enabled is redundant, remove it. Signed-off-by: Dawei Li Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240320064712.442579-2-dawei.li@shingroup.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 968761843203..9961864850cb 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -49,8 +49,8 @@ u64 satp_mode __ro_after_init = SATP_MODE_32; EXPORT_SYMBOL(satp_mode); #ifdef CONFIG_64BIT -bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); -bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l4_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); #endif -- cgit v1.2.3 From 0fdbb06379b1126a8c69ceec28e6e506088614a2 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 20 Mar 2024 14:47:12 +0800 Subject: riscv: Annotate pgtable_l{4,5}_enabled with __ro_after_init pgtable_l{4,5}_enabled are read only after initialization, make explicit annotation of __ro_after_init on them. Signed-off-by: Dawei Li Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240320064712.442579-3-dawei.li@shingroup.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9961864850cb..8631b33368ec 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -49,8 +49,8 @@ u64 satp_mode __ro_after_init = SATP_MODE_32; EXPORT_SYMBOL(satp_mode); #ifdef CONFIG_64BIT -bool pgtable_l4_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); -bool pgtable_l5_enabled = !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l4_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); #endif -- cgit v1.2.3 From dcb2743d1e701fc1a986c187adc11f6148316d21 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 25 Mar 2024 19:00:36 +0800 Subject: riscv: mm: still create swiotlb buffer for kmalloc() bouncing if required After commit f51f7a0fc2f4 ("riscv: enable DMA_BOUNCE_UNALIGNED_KMALLOC for !dma_coherent"), for non-coherent platforms with less than 4GB memory, we rely on users to pass "swiotlb=mmnn,force" kernel parameters to enable DMA bouncing for unaligned kmalloc() buffers. Now let's go further: If no bouncing needed for ZONE_DMA, let kernel automatically allocate 1MB swiotlb buffer per 1GB of RAM for kmalloc() bouncing on non-coherent platforms, so that no need to pass "swiotlb=mmnn,force" any more. The math of "1MB swiotlb buffer per 1GB of RAM for kmalloc() bouncing" is taken from arm64. Users can still force smaller swiotlb buffer by passing "swiotlb=mmnn". Signed-off-by: Jisheng Zhang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240325110036.1564-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cache.h | 2 +- arch/riscv/mm/init.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h index 2174fe7bac9a..570e9d8acad1 100644 --- a/arch/riscv/include/asm/cache.h +++ b/arch/riscv/include/asm/cache.h @@ -26,8 +26,8 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_RISCV_DMA_NONCOHERENT extern int dma_cache_alignment; +#ifdef CONFIG_RISCV_DMA_NONCOHERENT #define dma_get_cache_alignment dma_get_cache_alignment static inline int dma_get_cache_alignment(void) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8631b33368ec..2574f6a3b0e7 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -161,11 +161,25 @@ static void print_vm_layout(void) { } void __init mem_init(void) { + bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit); #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif /* CONFIG_FLATMEM */ - swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb && + dma_cache_alignment != 1) { + /* + * If no bouncing needed for ZONE_DMA, allocate 1MB swiotlb + * buffer per 1GB of RAM for kmalloc() bouncing on + * non-coherent platforms. + */ + unsigned long size = + DIV_ROUND_UP(memblock_phys_mem_size(), 1024); + swiotlb_adjust_size(min(swiotlb_size_or_default(), size)); + swiotlb = true; + } + + swiotlb_init(swiotlb, SWIOTLB_VERBOSE); memblock_free_all(); print_vm_layout(); -- cgit v1.2.3