From 83d8b38967d253942d9172b0c4d69b7d844d5f06 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Thu, 29 Jun 2023 20:58:43 +0800 Subject: LoongArch: Simplify the invtlb wrappers The invtlb instruction has been supported by upstream LoongArch toolchains from day one, so ditch the raw opcode trickery and just use plain inline asm for it. While at it, also make the invtlb asm statements barriers, for proper modeling of the side effects. The functions are also marked as __always_inline instead of just "inline", because they cannot work at all if not inlined: the op argument will not be compile-time const in that case, thus failing to satisfy the "i" constraint. The signature of the other more specific invtlb wrappers contain unused arguments right now, but these are not removed right away in order for the patch to be focused. In the meantime, assertions are added to ensure no accidental misuse happens before the refactor. (The more specific wrappers cannot re-use the generic invtlb wrapper, because the ISA manual says $zero shall be used in case a particular op does not take the respective argument: re-using the generic wrapper would mean losing control over the register usage.) Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/tlb.h | 43 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'arch/loongarch/include/asm/tlb.h') diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index f5e4deb97402..0ad53f1ad25d 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -88,52 +88,47 @@ enum invtlb_ops { INVTLB_GID_ADDR = 0x16, }; -/* - * invtlb op info addr - * (0x1 << 26) | (0x24 << 20) | (0x13 << 15) | - * (addr << 10) | (info << 5) | op - */ -static inline void invtlb(u32 op, u32 info, u64 addr) +static __always_inline void invtlb(u32 op, u32 info, u64 addr) { __asm__ __volatile__( - "parse_r addr,%0\n\t" - "parse_r info,%1\n\t" - ".word ((0x6498000) | (addr << 10) | (info << 5) | %2)\n\t" - : - : "r"(addr), "r"(info), "i"(op) + "invtlb %0, %1, %2\n\t" : + : "i"(op), "r"(info), "r"(addr) + : "memory" ); } -static inline void invtlb_addr(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_addr(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(info) || info != 0); __asm__ __volatile__( - "parse_r addr,%0\n\t" - ".word ((0x6498000) | (addr << 10) | (0 << 5) | %1)\n\t" - : - : "r"(addr), "i"(op) + "invtlb %0, $zero, %1\n\t" : + : "i"(op), "r"(addr) + : "memory" ); } -static inline void invtlb_info(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_info(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(addr) || addr != 0); __asm__ __volatile__( - "parse_r info,%0\n\t" - ".word ((0x6498000) | (0 << 10) | (info << 5) | %1)\n\t" - : - : "r"(info), "i"(op) + "invtlb %0, %1, $zero\n\t" : + : "i"(op), "r"(info) + : "memory" ); } -static inline void invtlb_all(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_all(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(info) || info != 0); + BUILD_BUG_ON(!__builtin_constant_p(addr) || addr != 0); __asm__ __volatile__( - ".word ((0x6498000) | (0 << 10) | (0 << 5) | %0)\n\t" + "invtlb %0, $zero, $zero\n\t" : : "i"(op) - : + : "memory" ); } -- cgit v1.2.3 From 01158487af60cd3915e8c31924144caf29cb0767 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 29 Jun 2023 20:58:44 +0800 Subject: LoongArch: Introduce hardware page table walker Loongson-3A6000 and newer processors have hardware page table walker (PTW) support. PTW can handle all fastpaths of TLBI/TLBL/TLBS/TLBM exceptions by hardware, software only need to handle slowpaths (page faults). BTW, PTW doesn't append _PAGE_MODIFIED for page table entries, so we change pmd_dirty() and pte_dirty() to also check _PAGE_DIRTY for the "dirty" attribute. Signed-off-by: Liang Gao Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 2 +- arch/loongarch/include/asm/cpu.h | 2 ++ arch/loongarch/include/asm/loongarch.h | 4 ++++ arch/loongarch/include/asm/pgtable.h | 4 ++-- arch/loongarch/include/asm/tlb.h | 3 +++ arch/loongarch/include/uapi/asm/hwcap.h | 1 + arch/loongarch/kernel/cpu-probe.c | 4 ++++ arch/loongarch/kernel/proc.c | 1 + arch/loongarch/mm/tlb.c | 21 +++++++++++++++++---- arch/loongarch/mm/tlbex.S | 21 +++++++++++++++++++++ 10 files changed, 56 insertions(+), 7 deletions(-) (limited to 'arch/loongarch/include/asm/tlb.h') diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index f6177f133477..2eafe6a6aca8 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -64,6 +64,6 @@ #define cpu_has_eiodecode cpu_opt(LOONGARCH_CPU_EIODECODE) #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) - +#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 88773d849e33..48b9f7168bcc 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -98,6 +98,7 @@ enum cpu_type_enum { #define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ #define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ #define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -125,5 +126,6 @@ enum cpu_type_enum { #define LOONGARCH_CPU_EIODECODE BIT_ULL(CPU_FEATURE_EIODECODE) #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) +#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 08c77d065a11..1ab1ed28d770 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -135,6 +135,7 @@ __asm__(".macro parse_r var r\n\t" #define CPUCFG2_MIPSBT BIT(20) #define CPUCFG2_LSPW BIT(21) #define CPUCFG2_LAM BIT(22) +#define CPUCFG2_PTW BIT(24) #define LOONGARCH_CPUCFG3 0x3 #define CPUCFG3_CCDMA BIT(0) @@ -412,6 +413,9 @@ __asm__(".macro parse_r var r\n\t" #define CSR_PWCTL0_PTBASE (_ULCAST_(0x1f) << CSR_PWCTL0_PTBASE_SHIFT) #define LOONGARCH_CSR_PWCTL1 0x1d /* PWCtl1 */ +#define CSR_PWCTL1_PTW_SHIFT 24 +#define CSR_PWCTL1_PTW_WIDTH 1 +#define CSR_PWCTL1_PTW (_ULCAST_(0x1) << CSR_PWCTL1_PTW_SHIFT) #define CSR_PWCTL1_DIR3WIDTH_SHIFT 18 #define CSR_PWCTL1_DIR3WIDTH_WIDTH 5 #define CSR_PWCTL1_DIR3WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR3WIDTH_SHIFT) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 9a9f9ff9b709..38afeb7dd58b 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -362,7 +362,7 @@ extern pgd_t invalid_pg_dir[]; */ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & (_PAGE_DIRTY | _PAGE_MODIFIED); } static inline pte_t pte_mkold(pte_t pte) { @@ -506,7 +506,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline int pmd_dirty(pmd_t pmd) { - return !!(pmd_val(pmd) & _PAGE_MODIFIED); + return !!(pmd_val(pmd) & (_PAGE_DIRTY | _PAGE_MODIFIED)); } static inline pmd_t pmd_mkclean(pmd_t pmd) diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index 0ad53f1ad25d..da7a3b5b9374 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -158,6 +158,9 @@ extern void handle_tlb_store(void); extern void handle_tlb_modify(void); extern void handle_tlb_refill(void); extern void handle_tlb_protect(void); +extern void handle_tlb_load_ptw(void); +extern void handle_tlb_store_ptw(void); +extern void handle_tlb_modify_ptw(void); extern void dump_tlb_all(void); extern void dump_tlb_regs(void); diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 8840b72fa8e8..6955a7cb2c65 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -16,5 +16,6 @@ #define HWCAP_LOONGARCH_LBT_X86 (1 << 10) #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) +#define HWCAP_LOONGARCH_PTW (1 << 13) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index f42acc6c8df6..e925579c7a71 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -136,6 +136,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_CRYPTO; elf_hwcap |= HWCAP_LOONGARCH_CRYPTO; } + if (config & CPUCFG2_PTW) { + c->options |= LOONGARCH_CPU_PTW; + elf_hwcap |= HWCAP_LOONGARCH_PTW; + } if (config & CPUCFG2_LVZP) { c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index d4b270630bb5..0d33cbc47e51 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -80,6 +80,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_crc32) seq_printf(m, " crc32"); if (cpu_has_complex) seq_printf(m, " complex"); if (cpu_has_crypto) seq_printf(m, " crypto"); + if (cpu_has_ptw) seq_printf(m, " ptw"); if (cpu_has_lvz) seq_printf(m, " lvz"); if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 8bad6b0cff59..00bb563e3c89 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -167,6 +167,9 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep int idx; unsigned long flags; + if (cpu_has_ptw) + return; + /* * Handle debugger faulting in for debugee. */ @@ -222,6 +225,9 @@ static void setup_ptwalker(void) pwctl0 = pte_i | pte_w << 5 | pmd_i << 10 | pmd_w << 15 | pud_i << 20 | pud_w << 25; pwctl1 = pgd_i | pgd_w << 6; + if (cpu_has_ptw) + pwctl1 |= CSR_PWCTL1_PTW; + csr_write64(pwctl0, LOONGARCH_CSR_PWCTL0); csr_write64(pwctl1, LOONGARCH_CSR_PWCTL1); csr_write64((long)swapper_pg_dir, LOONGARCH_CSR_PGDH); @@ -264,10 +270,17 @@ void setup_tlb_handler(int cpu) if (cpu == 0) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + if (!cpu_has_ptw) { + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + } else { + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE); + } set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 240ced55586e..4ad78703de6f 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -190,6 +190,13 @@ nopage_tlb_load: jr t0 SYM_FUNC_END(handle_tlb_load) +SYM_FUNC_START(handle_tlb_load_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la_abs t0, tlb_do_page_fault_0 + jr t0 +SYM_FUNC_END(handle_tlb_load_ptw) + SYM_FUNC_START(handle_tlb_store) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 @@ -339,6 +346,13 @@ nopage_tlb_store: jr t0 SYM_FUNC_END(handle_tlb_store) +SYM_FUNC_START(handle_tlb_store_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la_abs t0, tlb_do_page_fault_1 + jr t0 +SYM_FUNC_END(handle_tlb_store_ptw) + SYM_FUNC_START(handle_tlb_modify) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 @@ -486,6 +500,13 @@ nopage_tlb_modify: jr t0 SYM_FUNC_END(handle_tlb_modify) +SYM_FUNC_START(handle_tlb_modify_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la_abs t0, tlb_do_page_fault_1 + jr t0 +SYM_FUNC_END(handle_tlb_modify_ptw) + SYM_FUNC_START(handle_tlb_refill) csrwr t0, LOONGARCH_CSR_TLBRSAVE csrrd t0, LOONGARCH_CSR_PGD -- cgit v1.2.3