From 4cc3daaf3976bde5345718e86b67cace2f935a09 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Jan 2013 20:48:55 +0000 Subject: ARM: tlbflush: remove ARMv3 support We no longer support any ARMv3 platforms, so remove the old tlbflushing code. Signed-off-by: Will Deacon --- arch/arm/include/asm/tlbflush.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'arch/arm/include/asm/tlbflush.h') diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c8820f0d..a223003b0f17 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -14,7 +14,6 @@ #include -#define TLB_V3_PAGE (1 << 0) #define TLB_V4_U_PAGE (1 << 1) #define TLB_V4_D_PAGE (1 << 2) #define TLB_V4_I_PAGE (1 << 3) @@ -22,7 +21,6 @@ #define TLB_V6_D_PAGE (1 << 5) #define TLB_V6_I_PAGE (1 << 6) -#define TLB_V3_FULL (1 << 8) #define TLB_V4_U_FULL (1 << 9) #define TLB_V4_D_FULL (1 << 10) #define TLB_V4_I_FULL (1 << 11) @@ -52,7 +50,6 @@ * ============= * * We have the following to choose from: - * v3 - ARMv3 * v4 - ARMv4 without write buffer * v4wb - ARMv4 with write buffer without I TLB flush entry instruction * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction @@ -330,7 +327,6 @@ static inline void local_flush_tlb_all(void) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero); @@ -351,9 +347,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_FULL|TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { + if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero); @@ -385,9 +380,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_PAGE|TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && + if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { - tlb_op(TLB_V3_PAGE, "c6, c0, 0", uaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", uaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", uaddr); @@ -418,7 +412,6 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_PAGE, "c6, c0, 0", kaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr); -- cgit v1.2.3 From 93dc68876b608da041fe40ed39424b0fcd5aa2fb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 26 Mar 2013 23:35:04 +0100 Subject: ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181 (TLBI/DSB operations) On Cortex-A15 (r0p0..r3p2) the TLBI/DSB are not adequately shooting down all use of the old entries. This patch implements the erratum workaround which consists of: 1. Dummy TLBIMVAIS and DSB on the CPU doing the TLBI operation. 2. Send IPI to the CPUs that are running the same mm (and ASID) as the one being invalidated (or all the online CPUs for global pages). 3. CPU receiving the IPI executes a DMB and CLREX (part of the exception return code already). Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Kconfig | 10 ++++++ arch/arm/include/asm/highmem.h | 7 ++++ arch/arm/include/asm/mmu_context.h | 2 ++ arch/arm/include/asm/tlbflush.h | 15 +++++++++ arch/arm/kernel/smp_tlb.c | 66 ++++++++++++++++++++++++++++++++++++++ arch/arm/mm/context.c | 3 +- 6 files changed, 102 insertions(+), 1 deletion(-) (limited to 'arch/arm/include/asm/tlbflush.h') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 12ea3b3d49a9..1cacda426a0e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1439,6 +1439,16 @@ config ARM_ERRATA_775420 to deadlock. This workaround puts DSB before executing ISB if an abort may occur on cache maintenance. +config ARM_ERRATA_798181 + bool "ARM errata: TLBI/DSB failure on Cortex-A15" + depends on CPU_V7 && SMP + help + On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not + adequately shooting down all use of the old entries. This + option enables the Linux kernel workaround for this erratum + which sends an IPI to the CPUs that are running the same ASID + as the one being invalidated. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 8c5e828f484d..91b99abe7a95 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -41,6 +41,13 @@ extern void kunmap_high(struct page *page); #endif #endif +/* + * Needed to be able to broadcast the TLB invalidation for kmap. + */ +#ifdef CONFIG_ARM_ERRATA_798181 +#undef ARCH_NEEDS_KMAP_HIGH_GET +#endif + #ifdef ARCH_NEEDS_KMAP_HIGH_GET extern void *kmap_high_get(struct page *page); #else diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 863a6611323c..a7b85e0d0cc1 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -27,6 +27,8 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +DECLARE_PER_CPU(atomic64_t, active_asids); + #else /* !CONFIG_CPU_HAS_ASID */ #ifdef CONFIG_MMU diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c8820f0d..9e9c041358ca 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -450,6 +450,21 @@ static inline void local_flush_bp_all(void) isb(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static inline void dummy_flush_tlb_a15_erratum(void) +{ + /* + * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. + */ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(); +} +#else +static inline void dummy_flush_tlb_a15_erratum(void) +{ +} +#endif + /* * flush_pmd_entry * diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index bd0300531399..e82e1d248772 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -12,6 +12,7 @@ #include #include +#include /**********************************************************************/ @@ -69,12 +70,72 @@ static inline void ipi_flush_bp_all(void *ignored) local_flush_bp_all(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static int erratum_a15_798181(void) +{ + unsigned int midr = read_cpuid_id(); + + /* Cortex-A15 r0p0..r3p2 affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) + return 0; + return 1; +} +#else +static int erratum_a15_798181(void) +{ + return 0; +} +#endif + +static void ipi_flush_tlb_a15_erratum(void *arg) +{ + dmb(); +} + +static void broadcast_tlb_a15_erratum(void) +{ + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + smp_call_function_many(cpu_online_mask, ipi_flush_tlb_a15_erratum, + NULL, 1); +} + +static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) +{ + int cpu; + cpumask_t mask = { CPU_BITS_NONE }; + + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + /* + * We only need to send an IPI if the other CPUs are running + * the same ASID as the one being invalidated. There is no + * need for locking around the active_asids check since the + * switch_mm() function has at least one dmb() (as required by + * this workaround) in case a context switch happens on + * another CPU after the condition below. + */ + if (atomic64_read(&mm->context.id) == + atomic64_read(&per_cpu(active_asids, cpu))) + cpumask_set_cpu(cpu, &mask); + } + smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); +} + void flush_tlb_all(void) { if (tlb_ops_need_broadcast()) on_each_cpu(ipi_flush_tlb_all, NULL, 1); else local_flush_tlb_all(); + broadcast_tlb_a15_erratum(); } void flush_tlb_mm(struct mm_struct *mm) @@ -83,6 +144,7 @@ void flush_tlb_mm(struct mm_struct *mm) on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); else local_flush_tlb_mm(mm); + broadcast_tlb_mm_a15_erratum(mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) @@ -95,6 +157,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) &ta, 1); } else local_flush_tlb_page(vma, uaddr); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_page(unsigned long kaddr) @@ -105,6 +168,7 @@ void flush_tlb_kernel_page(unsigned long kaddr) on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); } else local_flush_tlb_kernel_page(kaddr); + broadcast_tlb_a15_erratum(); } void flush_tlb_range(struct vm_area_struct *vma, @@ -119,6 +183,7 @@ void flush_tlb_range(struct vm_area_struct *vma, &ta, 1); } else local_flush_tlb_range(vma, start, end); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -130,6 +195,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); } else local_flush_tlb_kernel_range(start, end); + broadcast_tlb_a15_erratum(); } void flush_bp_all(void) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index a5a4b2bc42ba..2ac37372ef52 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -48,7 +48,7 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -static DEFINE_PER_CPU(atomic64_t, active_asids); +DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; @@ -215,6 +215,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { local_flush_bp_all(); local_flush_tlb_all(); + dummy_flush_tlb_a15_erratum(); } atomic64_set(&per_cpu(active_asids, cpu), asid); -- cgit v1.2.3 From ae8a8b9553bd3906af74ff4e8d763904d20ab4e5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Apr 2013 17:16:57 +0100 Subject: ARM: 7691/1: mm: kill unused TLB_CAN_READ_FROM_L1_CACHE and use ALT_SMP instead Many ARMv7 cores have hardware page table walkers that can read the L1 cache. This is discoverable from the ID_MMFR3 register, although this can be expensive to access from the low-level set_pte functions and is a pain to cache, particularly with multi-cluster systems. A useful observation is that the multi-processing extensions for ARMv7 require coherent table walks, meaning that we can make use of ALT_SMP patching in proc-v7-* to patch away the cache flush safely for these cores. Reported-by: Albin Tonnerre Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/tlbflush.h | 2 +- arch/arm/mm/proc-v6.S | 2 -- arch/arm/mm/proc-v7-2level.S | 3 ++- arch/arm/mm/proc-v7-3level.S | 3 ++- arch/arm/mm/proc-v7.S | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/arm/include/asm/tlbflush.h') diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c8820f0d..7a3e48dceb84 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -169,7 +169,7 @@ # define v6wbi_always_flags (-1UL) #endif -#define v7wbi_tlb_flags_smp (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ +#define v7wbi_tlb_flags_smp (TLB_WB | TLB_BARRIER | \ TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | \ TLB_V7_UIS_ASID | TLB_V7_UIS_BP) #define v7wbi_tlb_flags_up (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \ diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index bcaaa8de9325..a286d4712b57 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -80,12 +80,10 @@ ENTRY(cpu_v6_do_idle) mov pc, lr ENTRY(cpu_v6_dcache_clean_area) -#ifndef TLB_CAN_READ_FROM_L1_CACHE 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #D_CACHE_LINE_SIZE subs r1, r1, #D_CACHE_LINE_SIZE bhi 1b -#endif mov pc, lr /* diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 78f520bc0e99..9704097c450e 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -110,7 +110,8 @@ ENTRY(cpu_v7_set_pte_ext) ARM( str r3, [r0, #2048]! ) THUMB( add r0, r0, #2048 ) THUMB( str r3, [r0] ) - mcr p15, 0, r0, c7, c10, 1 @ flush_pte + ALT_SMP(mov pc,lr) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif mov pc, lr ENDPROC(cpu_v7_set_pte_ext) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 6ffd78c0f9ab..363027e811d6 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -73,7 +73,8 @@ ENTRY(cpu_v7_set_pte_ext) tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY 1: strd r2, r3, [r0] - mcr p15, 0, r0, c7, c10, 1 @ flush_pte + ALT_SMP(mov pc, lr) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif mov pc, lr ENDPROC(cpu_v7_set_pte_ext) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3a3c015f8d5c..37716b0508e1 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -75,14 +75,14 @@ ENTRY(cpu_v7_do_idle) ENDPROC(cpu_v7_do_idle) ENTRY(cpu_v7_dcache_clean_area) -#ifndef TLB_CAN_READ_FROM_L1_CACHE + ALT_SMP(mov pc, lr) @ MP extensions imply L1 PTW + ALT_UP(W(nop)) dcache_line_size r2, r3 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, r2 subs r1, r1, r2 bhi 1b dsb -#endif mov pc, lr ENDPROC(cpu_v7_dcache_clean_area) -- cgit v1.2.3