From 4ab07fd3fbe6b6fe5f28e0e2987eb02bedc2b495 Mon Sep 17 00:00:00 2001 From: Alex Sverdlin Date: Tue, 27 Sep 2022 13:08:37 +0100 Subject: ARM: 9252/1: module: Teach unwinder about PLTs "unwind: Index not found eef26358" warnings keep popping up on CONFIG_ARM_MODULE_PLTS-enabled systems if the PC points to a PLT veneer. Teach the unwinder how to deal with them, taking into account they don't change state of the stack or register file except loading PC. Link: https://lore.kernel.org/linux-arm-kernel/20200402153845.30985-1-kursad.oney@broadcom.com/ Tested-by: Kursad Oney Reviewed-by: Linus Walleij Signed-off-by: Alexander Sverdlin Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/module.h | 5 +++++ arch/arm/kernel/module-plts.c | 14 ++++++++++++++ arch/arm/kernel/unwind.c | 13 ++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 5546c9751478..07c51a34f77d 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -37,6 +37,11 @@ struct mod_arch_specific { struct module; u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val); +#ifdef CONFIG_ARM_MODULE_PLTS +bool in_module_plt(unsigned long loc); +#else +static inline bool in_module_plt(unsigned long loc) { return false; } +#endif #ifdef CONFIG_THUMB2_KERNEL #define HAVE_ARCH_KALLSYMS_SYMBOL_VALUE diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 1fc309b41f94..af7c322ebed6 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -284,3 +284,17 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size); return 0; } + +bool in_module_plt(unsigned long loc) +{ + struct module *mod; + bool ret; + + preempt_disable(); + mod = __module_text_address(loc); + ret = mod && (loc - (u32)mod->arch.core.plt_ent < mod->arch.core.plt_count * PLT_ENT_SIZE || + loc - (u32)mod->arch.init.plt_ent < mod->arch.init.plt_count * PLT_ENT_SIZE); + preempt_enable(); + + return ret; +} diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index a37ea6c772cd..53be7ea6181b 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -395,8 +396,18 @@ int unwind_frame(struct stackframe *frame) idx = unwind_find_idx(frame->pc); if (!idx) { - if (frame->pc && kernel_text_address(frame->pc)) + if (frame->pc && kernel_text_address(frame->pc)) { + if (in_module_plt(frame->pc) && frame->pc != frame->lr) { + /* + * Quoting Ard: Veneers only set PC using a + * PC+immediate LDR, and so they don't affect + * the state of the stack or the register file + */ + frame->pc = frame->lr; + return URC_OK; + } pr_warn("unwind: Index not found %08lx\n", frame->pc); + } return -URC_FAILURE; } -- cgit v1.2.3 From d539fee9f825b0c8eac049732c83562b28a483b5 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Fri, 30 Sep 2022 22:30:09 +0100 Subject: ARM: 9253/1: ubsan: select ARCH_HAS_UBSAN_SANITIZE_ALL To enable UBSAN on ARM, this patch enables ARCH_HAS_UBSAN_SANITIZE_ALL from arm confiuration. Basic kernel bootup test is passed on arm with CONFIG_UBSAN_SANITIZE_ALL enabled. [florian: rebased against v6.0-rc7] Signed-off-by: Seung-Woo Kim Signed-off-by: Florian Fainelli Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 1 + arch/arm/boot/compressed/Makefile | 1 + arch/arm/vdso/Makefile | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 87badeae3181..c90aa58eab7f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -27,6 +27,7 @@ config ARM select ARCH_HAVE_NMI_SAFE_CMPXCHG if CPU_V7 || CPU_V7M || CPU_V6K select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_KEEP_MEMBLOCK + select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 41bcbb460fac..2cc2af13779e 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -27,6 +27,7 @@ KASAN_SANITIZE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n +UBSAN_SANITIZE := n # # Architecture dependencies diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8ca1c9f262a2..a7ec06ce3785 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -37,6 +37,7 @@ endif # Disable gcov profiling for VDSO code GCOV_PROFILE := n +UBSAN_SANITIZE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n -- cgit v1.2.3 From b40b84b120f5cb5f8c6de0b404ffb68cf72b7c90 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Tue, 11 Oct 2022 02:34:05 +0100 Subject: ARM: 9254/1: mm: Provide better message when kernel fault If there is a kernel fault, see do_kernel_fault(), we only print the generic "paging request" or "NULL pointer dereference" message which don't show read, write or excute information, let's provide better fault message for them. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- arch/arm/mm/fault.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 46cccd6bf705..f8fe0ec64c23 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -111,8 +111,9 @@ static void die_kernel_fault(const char *msg, struct mm_struct *mm, { bust_spinlocks(1); pr_alert("8<--- cut here ---\n"); - pr_alert("Unable to handle kernel %s at virtual address %08lx\n", - msg, addr); + pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n", + msg, addr, fsr & FSR_LNX_PF ? "execute" : + fsr & FSR_WRITE ? "write" : "read"); show_pte(KERN_ALERT, mm, addr); die("Oops", regs, fsr); -- cgit v1.2.3 From e513ffd881055c17ccdcc50f279360e2e1bffb40 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Tue, 11 Oct 2022 02:35:07 +0100 Subject: ARM: 9255/1: efi/dump UEFI runtime page tables for ARM UEFI runtime page tables dump only for ARM64 at present, but ARM support EFI and ARM_PTDUMP_DEBUGFS now. Since ARM could potentially execute with a 1G/3G user/kernel split, choosing 1G as the upper limit for UEFI runtime end, with this, we could enable UEFI runtime page tables on ARM. Acked-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/ptdump.h | 1 + arch/arm64/include/asm/ptdump.h | 1 + drivers/firmware/efi/arm-runtime.c | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/ptdump.h b/arch/arm/include/asm/ptdump.h index 0c2d3d0d4cc6..aad1d034136c 100644 --- a/arch/arm/include/asm/ptdump.h +++ b/arch/arm/include/asm/ptdump.h @@ -21,6 +21,7 @@ struct ptdump_info { void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_ARM_PTDUMP_DEBUGFS +#define EFI_RUNTIME_MAP_END SZ_1G void ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index b1dd7ecff7ef..581caac525b0 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -23,6 +23,7 @@ struct ptdump_info { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_PTDUMP_DEBUGFS +#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 3359ae2adf24..8f8ae479061b 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -25,14 +25,14 @@ #include #include -#if defined(CONFIG_PTDUMP_DEBUGFS) && defined(CONFIG_ARM64) +#if defined(CONFIG_PTDUMP_DEBUGFS) || defined(CONFIG_ARM_PTDUMP_DEBUGFS) #include static struct ptdump_info efi_ptdump_info = { .mm = &efi_mm, .markers = (struct addr_marker[]){ { 0, "UEFI runtime start" }, - { DEFAULT_MAP_WINDOW_64, "UEFI runtime end" }, + { EFI_RUNTIME_MAP_END, "UEFI runtime end" }, { -1, NULL } }, .base_addr = 0, -- cgit v1.2.3 From 3220022038b9a3845eea762af85f1c5694b9f861 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 11 Oct 2022 20:00:12 +0100 Subject: ARM: 9256/1: NWFPE: avoid compiler-generated __aeabi_uldivmod clang-15's ability to elide loops completely became more aggressive when it can deduce how a variable is being updated in a loop. Counting down one variable by an increment of another can be replaced by a modulo operation. For 64b variables on 32b ARM EABI targets, this can result in the compiler generating calls to __aeabi_uldivmod, which it does for a do while loop in float64_rem(). For the kernel, we'd generally prefer that developers not open code 64b division via binary / operators and instead use the more explicit helpers from div64.h. On arm-linux-gnuabi targets, failure to do so can result in linkage failures due to undefined references to __aeabi_uldivmod(). While developers can avoid open coding divisions on 64b variables, the compiler doesn't know that the Linux kernel has a partial implementation of a compiler runtime (--rtlib) to enforce this convention. It's also undecidable for the compiler whether the code in question would be faster to execute the loop vs elide it and do the 64b division. While I actively avoid using the internal -mllvm command line flags, I think we get better code than using barrier() here, which will force reloads+spills in the loop for all toolchains. Link: https://github.com/ClangBuiltLinux/linux/issues/1666 Reported-by: Nathan Chancellor Reviewed-by: Arnd Bergmann Signed-off-by: Nick Desaulniers Tested-by: Nathan Chancellor Cc: stable@vger.kernel.org Signed-off-by: Russell King (Oracle) --- arch/arm/nwfpe/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/nwfpe/Makefile b/arch/arm/nwfpe/Makefile index 303400fa2cdf..2aec85ab1e8b 100644 --- a/arch/arm/nwfpe/Makefile +++ b/arch/arm/nwfpe/Makefile @@ -11,3 +11,9 @@ nwfpe-y += fpa11.o fpa11_cpdo.o fpa11_cpdt.o \ entry.o nwfpe-$(CONFIG_FPE_NWFPE_XP) += extended_cpdo.o + +# Try really hard to avoid generating calls to __aeabi_uldivmod() from +# float64_rem() due to loop elision. +ifdef CONFIG_CC_IS_CLANG +CFLAGS_softfloat.o += -mllvm -replexitval=never +endif -- cgit v1.2.3 From 8fc0b333a7dd9b98040096296ba591a3ac3e12c0 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Mon, 17 Oct 2022 20:41:30 +0100 Subject: ARM: 9257/1: Disable FIQs (but not IRQs) on CPUs shutdown paths Currently the regular CPU shutdown path for ARM disables IRQs/FIQs in the secondary CPUs - smp_send_stop() calls ipi_cpu_stop(), which is responsible for that. IRQs are architecturally masked when we take an interrupt, but FIQs are high priority than IRQs, hence they aren't masked. With that said, it makes sense to disable FIQs here, but there's no need for (re-)disabling IRQs. More than that: there is an alternative path for disabling CPUs, in the form of function crash_smp_send_stop(), which is used for kexec/panic path. This function relies on a SMP call that also triggers a busy-wait loop [at machine_crash_nonpanic_core()], but without disabling FIQs. This might lead to odd scenarios, like early interrupts in the boot of kexec'd kernel or even interrupts in secondary "disabled" CPUs while the main one still works in the panic path and assumes all secondary CPUs are (really!) off. So, let's disable FIQs in both paths and *not* disable IRQs a second time, since they are already masked in both paths by the architecture. This way, we keep both CPU quiesce paths consistent and safe. Cc: Marc Zyngier Cc: Michael Kelley Signed-off-by: Guilherme G. Piccoli Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/machine_kexec.c | 2 ++ arch/arm/kernel/smp.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index f567032a09c0..0b482bcb97f7 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -77,6 +77,8 @@ void machine_crash_nonpanic_core(void *unused) { struct pt_regs regs; + local_fiq_disable(); + crash_setup_regs(®s, get_irq_regs()); printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", smp_processor_id()); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 978db2d96b44..36e6efad89f3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -600,6 +600,8 @@ static DEFINE_RAW_SPINLOCK(stop_lock); */ static void ipi_cpu_stop(unsigned int cpu) { + local_fiq_disable(); + if (system_state <= SYSTEM_RUNNING) { raw_spin_lock(&stop_lock); pr_crit("CPU%u: stopping\n", cpu); @@ -609,9 +611,6 @@ static void ipi_cpu_stop(unsigned int cpu) set_cpu_online(cpu, false); - local_fiq_disable(); - local_irq_disable(); - while (1) { cpu_relax(); wfe(); -- cgit v1.2.3 From 527d08631b30fb99721274eab8ebabd9c54b0b25 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 18 Oct 2022 20:50:59 +0100 Subject: ARM: 9260/1: lib/xor: use r10 rather than r7 in xor_arm4regs_{2|3} kbuild test robot reports: In file included from crypto/xor.c:17: ./arch/arm/include/asm/xor.h:61:3: error: write to reserved register 'R7' GET_BLOCK_4(p1); ^ ./arch/arm/include/asm/xor.h:20:10: note: expanded from macro 'GET_BLOCK_4' __asm__("ldmia %0, {%1, %2, %3, %4}" ^ ./arch/arm/include/asm/xor.h:63:3: error: write to reserved register 'R7' PUT_BLOCK_4(p1); ^ ./arch/arm/include/asm/xor.h:42:23: note: expanded from macro 'PUT_BLOCK_4' __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" ^ ./arch/arm/include/asm/xor.h:83:3: error: write to reserved register 'R7' GET_BLOCK_4(p1); ^ ./arch/arm/include/asm/xor.h:20:10: note: expanded from macro 'GET_BLOCK_4' __asm__("ldmia %0, {%1, %2, %3, %4}" ^ ./arch/arm/include/asm/xor.h:86:3: error: write to reserved register 'R7' PUT_BLOCK_4(p1); ^ ./arch/arm/include/asm/xor.h:42:23: note: expanded from macro 'PUT_BLOCK_4' __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" ^ Thumb2 uses r7 rather than r11 as the frame pointer. Let's use r10 rather than r7 for these temporaries. Link: https://github.com/ClangBuiltLinux/linux/issues/1732 Link: https://lore.kernel.org/llvm/202210072120.V1O2SuKY-lkp@intel.com/ Reported-by: kernel test robot Suggested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Signed-off-by: Nick Desaulniers Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/xor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h index 669cad5194d3..934b549905f5 100644 --- a/arch/arm/include/asm/xor.h +++ b/arch/arm/include/asm/xor.h @@ -51,7 +51,7 @@ xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1, register unsigned int a1 __asm__("r4"); register unsigned int a2 __asm__("r5"); register unsigned int a3 __asm__("r6"); - register unsigned int a4 __asm__("r7"); + register unsigned int a4 __asm__("r10"); register unsigned int b1 __asm__("r8"); register unsigned int b2 __asm__("r9"); register unsigned int b3 __asm__("ip"); @@ -73,7 +73,7 @@ xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1, register unsigned int a1 __asm__("r4"); register unsigned int a2 __asm__("r5"); register unsigned int a3 __asm__("r6"); - register unsigned int a4 __asm__("r7"); + register unsigned int a4 __asm__("r10"); register unsigned int b1 __asm__("r8"); register unsigned int b2 __asm__("r9"); register unsigned int b3 __asm__("ip"); -- cgit v1.2.3 From 5aa4860eb50f3700e3175759f608bcfb68d0d6ae Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 24 Oct 2022 20:43:47 +0100 Subject: ARM: 9262/1: remove lazy evaluation in Makefile arch-y and tune-y used lazy evaluation since they used to contain cc-option checks. They don't any longer, so just eagerly evaluate these command line flags. Reviewed-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Russell King (Oracle) --- arch/arm/Makefile | 60 +++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 33 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 44b240d6ed30..34bf50402f82 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -63,44 +63,38 @@ KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 -march=armv7-a -arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 -march=armv6 -# Only override the compiler option if ARMv6. The ARMv6K extensions are +arch-$(CONFIG_CPU_32v7M) :=-D__LINUX_ARM_ARCH__=7 -march=armv7-m +arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 -march=armv7-a +arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 -march=armv6 +# Only override the compiler opt:ion if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 -march=armv6k +arch-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 -march=armv6k endif -arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 -march=armv5te -arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t -arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 -arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m - -# Evaluate arch cc-option calls now -arch-y := $(arch-y) +arch-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 -march=armv5te +arch-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 -march=armv4t +arch-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 -march=armv4 +arch-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 -march=armv3m # This selects how we optimise for the processor. -tune-$(CONFIG_CPU_ARM7TDMI) =-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi -tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) =-mtune=arm9e -tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110 -tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) =-mtune=xscale -tune-$(CONFIG_CPU_XSC3) =-mtune=xscale -tune-$(CONFIG_CPU_FEROCEON) =-mtune=xscale -tune-$(CONFIG_CPU_V6) =-mtune=arm1136j-s -tune-$(CONFIG_CPU_V6K) =-mtune=arm1136j-s - -# Evaluate tune cc-option calls now -tune-y := $(tune-y) +tune-$(CONFIG_CPU_ARM7TDMI) :=-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM720T) :=-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM740T) :=-mtune=arm7tdmi +tune-$(CONFIG_CPU_ARM9TDMI) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM940T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM946E) :=-mtune=arm9e +tune-$(CONFIG_CPU_ARM920T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM922T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM925T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_ARM926T) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_FA526) :=-mtune=arm9tdmi +tune-$(CONFIG_CPU_SA110) :=-mtune=strongarm110 +tune-$(CONFIG_CPU_SA1100) :=-mtune=strongarm1100 +tune-$(CONFIG_CPU_XSCALE) :=-mtune=xscale +tune-$(CONFIG_CPU_XSC3) :=-mtune=xscale +tune-$(CONFIG_CPU_FEROCEON) :=-mtune=xscale +tune-$(CONFIG_CPU_V6) :=-mtune=arm1136j-s +tune-$(CONFIG_CPU_V6K) :=-mtune=arm1136j-s ifeq ($(CONFIG_AEABI),y) CFLAGS_ABI :=-mabi=aapcs-linux -mfpu=vfp -- cgit v1.2.3 From a2faac39866d0313f3ca59c36a9f4e077faf4f53 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 24 Oct 2022 20:44:41 +0100 Subject: ARM: 9263/1: use .arch directives instead of assembler command line flags Similar to commit a6c30873ee4a ("ARM: 8989/1: use .fpu assembler directives instead of assembler arguments"). GCC and GNU binutils support setting the "sub arch" via -march=, -Wa,-march, target function attribute, and .arch assembler directive. Clang was missing support for -Wa,-march=, but this was implemented in clang-13. The behavior of both GCC and Clang is to prefer -Wa,-march= over -march= for assembler and assembler-with-cpp sources, but Clang will warn about the -march= being unused. clang: warning: argument unused during compilation: '-march=armv6k' [-Wunused-command-line-argument] Since most assembler is non-conditionally assembled with one sub arch (modulo arch/arm/delay-loop.S which conditionally is assembled as armv4 based on CONFIG_ARCH_RPC, and arch/arm/mach-at91/pm-suspend.S which is conditionally assembled as armv7-a based on CONFIG_CPU_V7), prefer the .arch assembler directive. Add a few more instances found in compile testing as found by Arnd and Nathan. Link: https://github.com/llvm/llvm-project/commit/1d51c699b9e2ebc5bcfdbe85c74cc871426333d4 Link: https://bugs.llvm.org/show_bug.cgi?id=48894 Link: https://github.com/ClangBuiltLinux/linux/issues/1195 Link: https://github.com/ClangBuiltLinux/linux/issues/1315 Suggested-by: Arnd Bergmann Suggested-by: Nathan Chancellor Signed-off-by: Arnd Bergmann Tested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Russell King (Oracle) --- arch/arm/boot/compressed/Makefile | 1 - arch/arm/common/Makefile | 2 -- arch/arm/common/mcpm_head.S | 2 ++ arch/arm/common/vlock.S | 2 ++ arch/arm/kernel/Makefile | 2 -- arch/arm/kernel/hyp-stub.S | 2 ++ arch/arm/kernel/swp_emulate.c | 1 + arch/arm/lib/Makefile | 4 ---- arch/arm/lib/delay-loop.S | 4 ++++ arch/arm/mach-at91/Makefile | 3 --- arch/arm/mach-at91/pm_suspend.S | 4 ++++ arch/arm/mach-imx/Makefile | 3 --- arch/arm/mach-imx/headsmp.S | 2 ++ arch/arm/mach-imx/resume-imx6.S | 2 ++ arch/arm/mach-imx/suspend-imx6.S | 2 ++ arch/arm/mach-mvebu/Makefile | 3 --- arch/arm/mach-mvebu/coherency_ll.S | 1 + arch/arm/mach-mvebu/pmsu.c | 1 + arch/arm/mach-npcm/Makefile | 2 -- arch/arm/mach-npcm/headsmp.S | 2 ++ arch/arm/mach-tegra/Makefile | 2 -- arch/arm/mach-tegra/reset-handler.S | 2 ++ arch/arm/mach-tegra/sleep-tegra20.S | 2 ++ arch/arm/mach-tegra/sleep-tegra30.S | 2 ++ arch/arm/mach-tegra/sleep.S | 2 ++ arch/arm/mm/Makefile | 15 --------------- arch/arm/mm/abort-ev6.S | 1 + arch/arm/mm/abort-ev7.S | 1 + arch/arm/mm/cache-v6.S | 2 ++ arch/arm/mm/cache-v7.S | 2 ++ arch/arm/mm/cache-v7m.S | 2 ++ arch/arm/mm/copypage-feroceon.c | 1 + arch/arm/mm/proc-v6.S | 2 ++ arch/arm/mm/proc-v7-2level.S | 2 ++ arch/arm/mm/proc-v7.S | 2 ++ arch/arm/mm/tlb-v6.S | 2 ++ arch/arm/mm/tlb-v7.S | 2 ++ drivers/memory/Makefile | 2 -- drivers/memory/ti-emif-sram-pm.S | 1 + drivers/soc/bcm/brcmstb/pm/Makefile | 1 - drivers/soc/bcm/brcmstb/pm/s2-arm.S | 1 + 41 files changed, 54 insertions(+), 40 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 2cc2af13779e..8c63f0a26f56 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -164,4 +164,3 @@ $(obj)/piggy_data: $(obj)/../Image FORCE $(obj)/piggy.o: $(obj)/piggy_data CFLAGS_font.o := -Dstatic= -AFLAGS_hyp-stub.o := -Wa,-march=armv7-a diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 7bae8cbaafe7..9733381074e0 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -13,7 +13,5 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_CPU_V7) += secure_cntvoff.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o CFLAGS_REMOVE_mcpm_entry.o = -pg -AFLAGS_mcpm_head.o := -march=armv7-a -AFLAGS_vlock.o := -march=armv7-a obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 291d969bc719..299495c43dfd 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -15,6 +15,8 @@ #include "vlock.h" +.arch armv7-a + .if MCPM_SYNC_CLUSTER_CPUS .error "cpus must be the first member of struct mcpm_sync_struct" .endif diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S index f1c7fd44f1b1..1fa09c4697ed 100644 --- a/arch/arm/common/vlock.S +++ b/arch/arm/common/vlock.S @@ -12,6 +12,8 @@ #include #include "vlock.h" +.arch armv7-a + /* Select different code if voting flags can fit in a single word. */ #if VLOCK_VOTING_SIZE > 4 #define FEW(x...) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 553866751e1a..e64ed3a82f70 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -71,7 +71,6 @@ obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_OF) += devtree.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o -CFLAGS_swp_emulate.o := -Wa,-march=armv7-a obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o @@ -100,7 +99,6 @@ CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240) obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o -AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a ifeq ($(CONFIG_ARM_PSCI),y) obj-$(CONFIG_SMP) += psci_smp.o endif diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index b699b22a4db1..3a506b9095a5 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -9,6 +9,8 @@ #include #include +.arch armv7-a + #ifndef ZIMAGE /* * For the kernel proper, we need to find out the CPU boot mode long after diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index b74bfcf94fb1..fdce83c95acb 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -34,6 +34,7 @@ */ #define __user_swpX_asm(data, addr, res, temp, B) \ __asm__ __volatile__( \ + ".arch armv7-a\n" \ "0: ldrex"B" %2, [%3]\n" \ "1: strex"B" %0, %1, [%3]\n" \ " cmp %0, #0\n" \ diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 6d2ba454f25b..42fb75c06647 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -36,10 +36,6 @@ else lib-y += io-readsw-armv4.o io-writesw-armv4.o endif -ifeq ($(CONFIG_ARCH_RPC),y) - AFLAGS_delay-loop.o += -march=armv4 -endif - $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S index 3ccade0f8130..3ac05177d097 100644 --- a/arch/arm/lib/delay-loop.S +++ b/arch/arm/lib/delay-loop.S @@ -8,6 +8,10 @@ #include #include +#ifdef CONFIG_ARCH_RPC + .arch armv4 +#endif + .text .LC0: .word loops_per_jiffy diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 0dcc37180588..794bd12ab0a8 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -14,9 +14,6 @@ obj-$(CONFIG_SOC_SAMV7) += samv7.o # Power Management obj-$(CONFIG_ATMEL_PM) += pm.o pm_suspend.o -ifeq ($(CONFIG_CPU_V7),y) -AFLAGS_pm_suspend.o := -march=armv7-a -endif ifeq ($(CONFIG_PM_DEBUG),y) CFLAGS_pm.o += -DDEBUG endif diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index abe4ced33eda..f0aa220d668d 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -12,6 +12,10 @@ #include "pm.h" #include "pm_data-offsets.h" +#ifdef CONFIG_CPU_V7 +.arch armv7-a +#endif + #define SRAMC_SELF_FRESH_ACTIVE 0x01 #define SRAMC_SELF_FRESH_EXIT 0x00 diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 6fb3965b9ae6..5c650bf40e02 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -34,7 +34,6 @@ obj-$(CONFIG_HAVE_IMX_GPC) += gpc.o obj-$(CONFIG_HAVE_IMX_MMDC) += mmdc.o obj-$(CONFIG_HAVE_IMX_SRC) += src.o ifneq ($(CONFIG_SOC_IMX6)$(CONFIG_SOC_IMX7D_CA7)$(CONFIG_SOC_LS1021A),) -AFLAGS_headsmp.o :=-Wa,-march=armv7-a obj-$(CONFIG_SMP) += headsmp.o platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o endif @@ -48,12 +47,10 @@ obj-$(CONFIG_SOC_IMX7D_CM4) += mach-imx7d-cm4.o obj-$(CONFIG_SOC_IMX7ULP) += mach-imx7ulp.o pm-imx7ulp.o ifeq ($(CONFIG_SUSPEND),y) -AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif ifeq ($(CONFIG_ARM_CPU_SUSPEND),y) -AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += resume-imx6.o endif obj-$(CONFIG_SOC_IMX6) += pm-imx6.o diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S index fcba58be8e79..5f9c7b48ae80 100644 --- a/arch/arm/mach-imx/headsmp.S +++ b/arch/arm/mach-imx/headsmp.S @@ -8,6 +8,8 @@ #include #include +.arch armv7-a + diag_reg_offset: .word g_diag_reg - . diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S index 5bd1ba7ef15b..2c0c5c771251 100644 --- a/arch/arm/mach-imx/resume-imx6.S +++ b/arch/arm/mach-imx/resume-imx6.S @@ -9,6 +9,8 @@ #include #include "hardware.h" +.arch armv7-a + /* * The following code must assume it is running from physical address * where absolute virtual addresses to the data section have to be diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index e06f946b75b9..63ccc2d0e920 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -9,6 +9,8 @@ #include #include "hardware.h" +.arch armv7-a + /* * ==================== low level suspend ==================== * diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile index cb106899dd7c..9b9fddb69958 100644 --- a/arch/arm/mach-mvebu/Makefile +++ b/arch/arm/mach-mvebu/Makefile @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include -AFLAGS_coherency_ll.o := -Wa,-march=armv7-a -CFLAGS_pmsu.o := -march=armv7-a - obj-$(CONFIG_MACH_MVEBU_ANY) += system-controller.o mvebu-soc-id.o ifeq ($(CONFIG_MACH_MVEBU_V7),y) diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index eb81656e32d4..35930e03d9c6 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -20,6 +20,7 @@ #include #include + .arch armv7-a .text /* * Returns the coherency base address in r1 (r0 is untouched), or 0 if diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c index af27a7156675..6f366d8c4231 100644 --- a/arch/arm/mach-mvebu/pmsu.c +++ b/arch/arm/mach-mvebu/pmsu.c @@ -291,6 +291,7 @@ int armada_370_xp_pmsu_idle_enter(unsigned long deepidle) /* Test the CR_C bit and set it if it was cleared */ asm volatile( + ".arch armv7-a\n\t" "mrc p15, 0, r0, c1, c0, 0 \n\t" "tst r0, %0 \n\t" "orreq r0, r0, #(1 << 2) \n\t" diff --git a/arch/arm/mach-npcm/Makefile b/arch/arm/mach-npcm/Makefile index 8d61fcd42fb1..ac83e1caf2ee 100644 --- a/arch/arm/mach-npcm/Makefile +++ b/arch/arm/mach-npcm/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -AFLAGS_headsmp.o += -march=armv7-a - obj-$(CONFIG_ARCH_WPCM450) += wpcm450.o obj-$(CONFIG_ARCH_NPCM7XX) += npcm7xx.o obj-$(CONFIG_SMP) += platsmp.o headsmp.o diff --git a/arch/arm/mach-npcm/headsmp.S b/arch/arm/mach-npcm/headsmp.S index c083fe09a07b..84d2b6daaf0b 100644 --- a/arch/arm/mach-npcm/headsmp.S +++ b/arch/arm/mach-npcm/headsmp.S @@ -6,6 +6,8 @@ #include #include +.arch armv7-a + /* * The boot ROM does not start secondary CPUs in SVC mode, so we need to do that * here. diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile index 07572b5373b8..a2bb55bc0081 100644 --- a/arch/arm/mach-tegra/Makefile +++ b/arch/arm/mach-tegra/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -asflags-y += -march=armv7-a - obj-y += io.o obj-y += irq.o obj-y += pm.o diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 06ca44b09381..0ea456264f3e 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -19,6 +19,8 @@ #define PMC_SCRATCH41 0x140 +.arch armv7-a + #ifdef CONFIG_PM_SLEEP /* * tegra_resume diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S index a5a36cce142a..d8cd487a8f63 100644 --- a/arch/arm/mach-tegra/sleep-tegra20.S +++ b/arch/arm/mach-tegra/sleep-tegra20.S @@ -47,6 +47,8 @@ #define PLLM_STORE_MASK (1 << 1) #define PLLP_STORE_MASK (1 << 2) +.arch armv7-a + .macro test_pll_state, rd, test_mask ldr \rd, tegra_pll_state tst \rd, #\test_mask diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index 0cc40b6b2ba3..134ea5fe49b2 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -78,6 +78,8 @@ #define PLLX_STORE_MASK (1 << 4) #define PLLM_PMC_STORE_MASK (1 << 5) +.arch armv7-a + .macro emc_device_mask, rd, base ldr \rd, [\base, #EMC_ADR_CFG] tst \rd, #0x1 diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S index 8f88944831c5..945f2c1474f7 100644 --- a/arch/arm/mach-tegra/sleep.S +++ b/arch/arm/mach-tegra/sleep.S @@ -22,6 +22,8 @@ #define CLK_RESET_CCLK_BURST 0x20 #define CLK_RESET_CCLK_DIVIDER 0x24 +.arch armv7-a + #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP) /* * tegra_disable_clean_inv_dcache diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 3510503bc5e6..71b858c9b10c 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -33,9 +33,6 @@ obj-$(CONFIG_CPU_ABRT_EV5TJ) += abort-ev5tj.o obj-$(CONFIG_CPU_ABRT_EV6) += abort-ev6.o obj-$(CONFIG_CPU_ABRT_EV7) += abort-ev7.o -AFLAGS_abort-ev6.o :=-Wa,-march=armv6k -AFLAGS_abort-ev7.o :=-Wa,-march=armv7-a - obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o @@ -49,10 +46,6 @@ obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o obj-$(CONFIG_CPU_CACHE_V7M) += cache-v7m.o -AFLAGS_cache-v6.o :=-Wa,-march=armv6 -AFLAGS_cache-v7.o :=-Wa,-march=armv7-a -AFLAGS_cache-v7m.o :=-Wa,-march=armv7-m - obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o obj-$(CONFIG_CPU_COPY_FEROCEON) += copypage-feroceon.o @@ -62,8 +55,6 @@ obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o -CFLAGS_copypage-feroceon.o := -march=armv5te - obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o @@ -72,9 +63,6 @@ obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o -AFLAGS_tlb-v6.o :=-Wa,-march=armv6 -AFLAGS_tlb-v7.o :=-Wa,-march=armv7-a - obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o obj-$(CONFIG_CPU_ARM740T) += proc-arm740.o @@ -101,9 +89,6 @@ obj-$(CONFIG_CPU_V6K) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o proc-v7-bugs.o obj-$(CONFIG_CPU_V7M) += proc-v7m.o -AFLAGS_proc-v6.o :=-Wa,-march=armv6 -AFLAGS_proc-v7.o :=-Wa,-march=armv7-a - obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index c58bf8b43fea..836dc1299243 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -16,6 +16,7 @@ * abort here if the I-TLB and D-TLB aren't seeing the same * picture. Unfortunately, this does happen. We live with it. */ + .arch armv6k .align 5 ENTRY(v6_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index f81bceacc660..53fb41c24774 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -12,6 +12,7 @@ * * Purpose : obtain information about current aborted instruction. */ + .arch armv7-a .align 5 ENTRY(v7_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index f0f65eb073e4..250c83bf7158 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -19,6 +19,8 @@ #define D_CACHE_LINE_SIZE 32 #define BTB_FLUSH_SIZE 8 +.arch armv6 + /* * v6_flush_icache_all() * diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 7c9499b728c4..127afe2096ba 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -16,6 +16,8 @@ #include "proc-macros.S" +.arch armv7-a + #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND .globl icache_size .data diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index 1bc3a0a50753..eb60b5e5e2ad 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -18,6 +18,8 @@ #include "proc-macros.S" +.arch armv7-m + /* Generic V7M read/write macros for memory mapped cache operations */ .macro v7m_cache_read, rt, reg movw \rt, #:lower16:BASEADDR_V7M_SCB + \reg diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c index 064b19e63571..5fc8ef1e665f 100644 --- a/arch/arm/mm/copypage-feroceon.c +++ b/arch/arm/mm/copypage-feroceon.c @@ -15,6 +15,7 @@ static void feroceon_copy_user_page(void *kto, const void *kfrom) int tmp; asm volatile ("\ +.arch armv5te \n\ 1: ldmia %1!, {r2 - r7, ip, lr} \n\ pld [%1, #0] \n\ pld [%1, #32] \n\ diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index a0618f3e6836..203dff89ab1a 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -32,6 +32,8 @@ #define TTB_FLAGS_SMP TTB_RGN_WBWA|TTB_S #define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S +.arch armv6 + ENTRY(cpu_v6_proc_init) ret lr diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 5db029c8f987..0a3083ad19c2 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -24,6 +24,8 @@ #define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA #define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S +.arch armv7-a + /* * cpu_v7_switch_mm(pgd_phys, tsk) * diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 26d726a08a34..6b4ef9539b68 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -24,6 +24,8 @@ #include "proc-v7-2level.S" #endif +.arch armv7-a + ENTRY(cpu_v7_proc_init) ret lr ENDPROC(cpu_v7_proc_init) diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index 74f4b383afe3..1d91e49b2c2d 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -17,6 +17,8 @@ #define HARVARD_TLB +.arch armv6 + /* * v6wbi_flush_user_tlb_range(start, end, vma) * diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 87bf4ab17721..35fd6d4f0d03 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -16,6 +16,8 @@ #include #include "proc-macros.S" +.arch armv7-a + /* * v7wbi_flush_user_tlb_range(start, end, vma) * diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile index bc7663ed1c25..d1388a748872 100644 --- a/drivers/memory/Makefile +++ b/drivers/memory/Makefile @@ -32,8 +32,6 @@ obj-$(CONFIG_FPGA_DFL_EMIF) += dfl-emif.o ti-emif-sram-objs := ti-emif-pm.o ti-emif-sram-pm.o -AFLAGS_ti-emif-sram-pm.o :=-Wa,-march=armv7-a - $(obj)/ti-emif-sram-pm.o: $(obj)/ti-emif-asm-offsets.h $(obj)/ti-emif-asm-offsets.h: $(obj)/emif-asm-offsets.s FORCE diff --git a/drivers/memory/ti-emif-sram-pm.S b/drivers/memory/ti-emif-sram-pm.S index 9bcac35c3304..d60a8cfd63f3 100644 --- a/drivers/memory/ti-emif-sram-pm.S +++ b/drivers/memory/ti-emif-sram-pm.S @@ -28,6 +28,7 @@ .arm .align 3 + .arch armv7-a ENTRY(ti_emif_sram) diff --git a/drivers/soc/bcm/brcmstb/pm/Makefile b/drivers/soc/bcm/brcmstb/pm/Makefile index 8e10abb14f8b..f849cfa69446 100644 --- a/drivers/soc/bcm/brcmstb/pm/Makefile +++ b/drivers/soc/bcm/brcmstb/pm/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ARM) += s2-arm.o pm-arm.o -AFLAGS_s2-arm.o := -march=armv7-a obj-$(CONFIG_BMIPS_GENERIC) += s2-mips.o s3-mips.o pm-mips.o diff --git a/drivers/soc/bcm/brcmstb/pm/s2-arm.S b/drivers/soc/bcm/brcmstb/pm/s2-arm.S index 5f0c4a8ae9df..0d693795de27 100644 --- a/drivers/soc/bcm/brcmstb/pm/s2-arm.S +++ b/drivers/soc/bcm/brcmstb/pm/s2-arm.S @@ -8,6 +8,7 @@ #include "pm.h" + .arch armv7-a .text .align 3 -- cgit v1.2.3 From 26b12e084bce78f339bf9069ff25e0128313d9bc Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 24 Oct 2022 20:45:17 +0100 Subject: ARM: 9264/1: only use -mtp=cp15 for the compiler Avoids an error from the assembler for CONFIG_THUMB2 kernels: clang-15: error: hardware TLS register is not supported for the thumbv4t sub-architecture This flag only makes sense to pass to the compiler, not the assembler. Perhaps CFLAGS_ABI can be renamed to CPPFLAGS_ABI to reflect that they will be passed to both the compiler and assembler for sources that require pre-processing. Reviewed-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Russell King (Oracle) --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 34bf50402f82..5fdd6b9bc7c1 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -111,7 +111,7 @@ CFLAGS_ABI += -meabi gnu endif ifeq ($(CONFIG_CURRENT_POINTER_IN_TPIDRURO),y) -CFLAGS_ABI += -mtp=cp15 +KBUILD_CFLAGS += -mtp=cp15 endif # Accept old syntax despite ".syntax unified" -- cgit v1.2.3 From 1d2e9b67b001f8c0d10138797b9df4779609c03c Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 24 Oct 2022 20:46:05 +0100 Subject: ARM: 9265/1: pass -march= only to compiler When both -march= and -Wa,-march= are specified for assembler or assembler-with-cpp sources, GCC and Clang will prefer the -Wa,-march= value but Clang will warn that -march= is unused. warning: argument unused during compilation: '-march=armv6k' [-Wunused-command-line-argument] This is the top group of warnings we observe when using clang to assemble the kernel via `ARCH=arm make LLVM=1`. Split the arch-y make variable into two, so that -march= flags only get passed to the compiler, not the assembler. -D flags are added to KBUILD_CPPFLAGS which is used for both C and assembler-with-cpp sources. Clang is trying to warn that it doesn't support different values for -march= and -Wa,-march= (like GCC does, but the kernel doesn't need this) though the value of the preprocessor define __thumb2__ is based on -march=. Make sure to re-set __thumb2__ via -D flag for assembler sources now that we're no longer passing -march= to the assembler. Set it to a different value than the preprocessor would for -march= in case -march= gets accidentally re-added to KBUILD_AFLAGS in the future. Thanks to Ard and Nathan for this suggestion. Link: https://github.com/ClangBuiltLinux/linux/issues/1315 Link: https://github.com/ClangBuiltLinux/linux/issues/1587 Link: https://github.com/llvm/llvm-project/issues/55656 Suggested-by: Ard Biesheuvel Suggested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Russell King (Oracle) --- arch/arm/Makefile | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 5fdd6b9bc7c1..357f0d9b8607 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -60,21 +60,34 @@ endif KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) # This selects which instruction set is used. +arch-$(CONFIG_CPU_32v7M) :=-march=armv7-m +arch-$(CONFIG_CPU_32v7) :=-march=armv7-a +arch-$(CONFIG_CPU_32v6) :=-march=armv6 +# Only override the compiler option if ARMv6. The ARMv6K extensions are +# always available in ARMv7 +ifeq ($(CONFIG_CPU_32v6),y) +arch-$(CONFIG_CPU_32v6K) :=-march=armv6k +endif +arch-$(CONFIG_CPU_32v5) :=-march=armv5te +arch-$(CONFIG_CPU_32v4T) :=-march=armv4t +arch-$(CONFIG_CPU_32v4) :=-march=armv4 +arch-$(CONFIG_CPU_32v3) :=-march=armv3m + # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) :=-D__LINUX_ARM_ARCH__=7 -march=armv7-m -arch-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 -march=armv7-a -arch-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 -march=armv6 -# Only override the compiler opt:ion if ARMv6. The ARMv6K extensions are +cpp-$(CONFIG_CPU_32v7M) :=-D__LINUX_ARM_ARCH__=7 +cpp-$(CONFIG_CPU_32v7) :=-D__LINUX_ARM_ARCH__=7 +cpp-$(CONFIG_CPU_32v6) :=-D__LINUX_ARM_ARCH__=6 +# Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 -march=armv6k +cpp-$(CONFIG_CPU_32v6K) :=-D__LINUX_ARM_ARCH__=6 endif -arch-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 -march=armv5te -arch-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 -march=armv4t -arch-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 -march=armv4 -arch-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 -march=armv3m +cpp-$(CONFIG_CPU_32v5) :=-D__LINUX_ARM_ARCH__=5 +cpp-$(CONFIG_CPU_32v4T) :=-D__LINUX_ARM_ARCH__=4 +cpp-$(CONFIG_CPU_32v4) :=-D__LINUX_ARM_ARCH__=4 +cpp-$(CONFIG_CPU_32v3) :=-D__LINUX_ARM_ARCH__=3 # This selects how we optimise for the processor. tune-$(CONFIG_CPU_ARM7TDMI) :=-mtune=arm7tdmi @@ -119,15 +132,16 @@ AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) ifeq ($(CONFIG_THUMB2_KERNEL),y) CFLAGS_ISA :=-mthumb -Wa,-mimplicit-it=always $(AFLAGS_NOWARN) -AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb +AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb -D__thumb2__=2 else CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) endif # Need -Uarm for gcc < 3.x +KBUILD_CPPFLAGS +=$(cpp-y) KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm -KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float +KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) -Wa,$(arch-y) $(tune-y) -include asm/unified.h -msoft-float CHECKFLAGS += -D__arm__ -- cgit v1.2.3 From 70ccc7c0667ba8a39dab274b3836b063a6b4ecf9 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Tue, 18 Oct 2022 13:57:02 +0100 Subject: ARM: 9258/1: stacktrace: Make stack walk callback consistent with generic code As with the generic arch_stack_walk() code the ARM stack walk code takes a callback that is called per stack frame. Currently the ARM code always passes a struct stackframe to the callback and the generic code just passes the pc, however none of the users ever reference anything in the struct other than the pc value. The ARM code also uses a return type of int while the generic code uses a return type of bool though in both cases the return value is a boolean value and the sense is inverted between the two. In order to reduce code duplication when ARM is converted to use arch_stack_walk() change the signature and return sense of the ARM specific callback to match that of the generic code. Signed-off-by: Li Huafei Reviewed-by: Mark Brown Reviewed-by: Linus Waleij Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/stacktrace.h | 2 +- arch/arm/kernel/perf_callchain.c | 9 ++++----- arch/arm/kernel/return_address.c | 8 ++++---- arch/arm/kernel/stacktrace.c | 13 ++++++------- 4 files changed, 15 insertions(+), 17 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 36b2ff44fcbb..360f0d2406bf 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -44,7 +44,7 @@ void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) extern int unwind_frame(struct stackframe *frame); extern void walk_stackframe(struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data); + bool (*fn)(void *, unsigned long), void *data); extern void dump_mem(const char *lvl, const char *str, unsigned long bottom, unsigned long top); extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index bc6b246ab55e..7147edbe56c6 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -81,13 +81,12 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs * whist unwinding the stackframe and is like a subroutine return so we use * the PC. */ -static int -callchain_trace(struct stackframe *fr, - void *data) +static bool +callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, fr->pc); - return 0; + perf_callchain_store(entry, pc); + return true; } void diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 38f1ea9c724d..ac15db66df4c 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -16,17 +16,17 @@ struct return_address_data { void *addr; }; -static int save_return_addr(struct stackframe *frame, void *d) +static bool save_return_addr(void *d, unsigned long pc) { struct return_address_data *data = d; if (!data->level) { - data->addr = (void *)frame->pc; + data->addr = (void *)pc; - return 1; + return false; } else { --data->level; - return 0; + return true; } } diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 85443b5d1922..d05968bc7812 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -127,12 +127,12 @@ int notrace unwind_frame(struct stackframe *frame) #endif void notrace walk_stackframe(struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data) + bool (*fn)(void *, unsigned long), void *data) { while (1) { int ret; - if (fn(frame, data)) + if (!fn(data, frame->pc)) break; ret = unwind_frame(frame); if (ret < 0) @@ -148,21 +148,20 @@ struct stack_trace_data { unsigned int skip; }; -static int save_trace(struct stackframe *frame, void *d) +static bool save_trace(void *d, unsigned long addr) { struct stack_trace_data *data = d; struct stack_trace *trace = data->trace; - unsigned long addr = frame->pc; if (data->no_sched_functions && in_sched_functions(addr)) - return 0; + return true; if (data->skip) { data->skip--; - return 0; + return true; } trace->entries[trace->nr_entries++] = addr; - return trace->nr_entries >= trace->max_entries; + return trace->nr_entries < trace->max_entries; } /* This must be noinline to so that our skip calculation works correctly */ -- cgit v1.2.3 From 9fbed16c3f4f2bebff610ff1ebb756785dfde0be Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Tue, 18 Oct 2022 13:59:02 +0100 Subject: ARM: 9259/1: stacktrace: Convert stacktrace to generic ARCH_STACKWALK Historically architectures have had duplicated code in their stack trace implementations for filtering what gets traced. In order to avoid this duplication some generic code has been provided using a new interface arch_stack_walk(), enabled by selecting ARCH_STACKWALK in Kconfig, which factors all this out into the generic stack trace code. Convert ARM to use this common infrastructure. When initializing the stack frame of the current task, arm64 uses __builtin_frame_address(1) to initialize the frame pointer, skipping arch_stack_walk(), see the commit c607ab4f916d ("arm64: stacktrace: don't trace arch_stack_walk()"). Since __builtin_frame_address(1) does not work on ARM, unwind_frame() is used to unwind the stack one layer forward before calling walk_stackframe(). Signed-off-by: Li Huafei Reviewed-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 1 + arch/arm/kernel/stacktrace.c | 114 ++++++++++++------------------------------- 2 files changed, 33 insertions(+), 82 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c90aa58eab7f..b3427536731d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -17,6 +17,7 @@ config ARM select ARCH_HAS_PTE_SPECIAL if ARM_LPAE select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_MEMORY + select ARCH_STACKWALK select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU select ARCH_HAS_SYNC_DMA_FOR_DEVICE diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index d05968bc7812..620aa82e3bdd 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -142,40 +142,32 @@ void notrace walk_stackframe(struct stackframe *frame, EXPORT_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE -struct stack_trace_data { - struct stack_trace *trace; - unsigned int no_sched_functions; - unsigned int skip; -}; - -static bool save_trace(void *d, unsigned long addr) +static void start_stack_trace(struct stackframe *frame, struct task_struct *task, + unsigned long fp, unsigned long sp, + unsigned long lr, unsigned long pc) { - struct stack_trace_data *data = d; - struct stack_trace *trace = data->trace; - - if (data->no_sched_functions && in_sched_functions(addr)) - return true; - if (data->skip) { - data->skip--; - return true; - } - - trace->entries[trace->nr_entries++] = addr; - return trace->nr_entries < trace->max_entries; + frame->fp = fp; + frame->sp = sp; + frame->lr = lr; + frame->pc = pc; +#ifdef CONFIG_KRETPROBES + frame->kr_cur = NULL; + frame->tsk = task; +#endif +#ifdef CONFIG_UNWINDER_FRAME_POINTER + frame->ex_frame = in_entry_text(frame->pc); +#endif } -/* This must be noinline to so that our skip calculation works correctly */ -static noinline void __save_stack_trace(struct task_struct *tsk, - struct stack_trace *trace, unsigned int nosched) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { - struct stack_trace_data data; struct stackframe frame; - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = nosched; - - if (tsk != current) { + if (regs) { + start_stack_trace(&frame, NULL, regs->ARM_fp, regs->ARM_sp, + regs->ARM_lr, regs->ARM_pc); + } else if (task != current) { #ifdef CONFIG_SMP /* * What guarantees do we have here that 'tsk' is not @@ -184,64 +176,22 @@ static noinline void __save_stack_trace(struct task_struct *tsk, */ return; #else - frame.fp = thread_saved_fp(tsk); - frame.sp = thread_saved_sp(tsk); - frame.lr = 0; /* recovered from the stack */ - frame.pc = thread_saved_pc(tsk); + start_stack_trace(&frame, task, thread_saved_fp(task), + thread_saved_sp(task), 0, + thread_saved_pc(task)); #endif } else { - /* We don't want this function nor the caller */ - data.skip += 2; - frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_stack_pointer; - frame.lr = (unsigned long)__builtin_return_address(0); here: - frame.pc = (unsigned long)&&here; + start_stack_trace(&frame, task, + (unsigned long)__builtin_frame_address(0), + current_stack_pointer, + (unsigned long)__builtin_return_address(0), + (unsigned long)&&here); + /* skip this function */ + if (unwind_frame(&frame)) + return; } -#ifdef CONFIG_KRETPROBES - frame.kr_cur = NULL; - frame.tsk = tsk; -#endif -#ifdef CONFIG_UNWINDER_FRAME_POINTER - frame.ex_frame = false; -#endif - walk_stackframe(&frame, save_trace, &data); -} - -void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) -{ - struct stack_trace_data data; - struct stackframe frame; - - data.trace = trace; - data.skip = trace->skip; - data.no_sched_functions = 0; - - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; - frame.pc = regs->ARM_pc; -#ifdef CONFIG_KRETPROBES - frame.kr_cur = NULL; - frame.tsk = current; -#endif -#ifdef CONFIG_UNWINDER_FRAME_POINTER - frame.ex_frame = in_entry_text(frame.pc); -#endif - - walk_stackframe(&frame, save_trace, &data); -} - -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) -{ - __save_stack_trace(tsk, trace, 1); -} -EXPORT_SYMBOL(save_stack_trace_tsk); - -void save_stack_trace(struct stack_trace *trace) -{ - __save_stack_trace(current, trace, 0); + walk_stackframe(&frame, consume_entry, cookie); } -EXPORT_SYMBOL_GPL(save_stack_trace); #endif -- cgit v1.2.3 From 7e0093870e6c550139a27d1780f54f1e8acc4978 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 29 Jul 2022 16:12:25 +0100 Subject: ARM: findbit: document ARMv5 bit offset calculation Document the ARMv5 bit offset calculation code. Signed-off-by: Russell King (Oracle) --- arch/arm/lib/findbit.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 7fd3600db8ef..4c584bc4704b 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -172,10 +172,10 @@ ENDPROC(_find_next_bit_be) .L_found: #if __LINUX_ARM_ARCH__ >= 5 rsb r0, r3, #0 - and r3, r3, r0 - clz r3, r3 - rsb r3, r3, #31 - add r0, r2, r3 + and r3, r3, r0 @ mask out lowest bit set + clz r3, r3 @ count high zero bits + rsb r3, r3, #31 @ offset of first set bit + add r0, r2, r3 @ add offset of first set bit #else tst r3, #0x0f addeq r2, r2, #4 -- cgit v1.2.3 From bceab1431e0789794cbcc8bb71d500a74c8213f2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 29 Jul 2022 16:10:49 +0100 Subject: ARM: findbit: provide more efficient ARMv7 implementation Provide a more efficient ARMv7 implementation to determine the first set bit in the supplied value. Signed-off-by: Russell King (Oracle) --- arch/arm/lib/findbit.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 4c584bc4704b..256e095d490b 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -170,7 +170,11 @@ ENDPROC(_find_next_bit_be) * One or more bits in the LSB of r3 are assumed to be set. */ .L_found: -#if __LINUX_ARM_ARCH__ >= 5 +#if __LINUX_ARM_ARCH__ >= 7 + rbit r3, r3 @ reverse bits + clz r3, r3 @ count high zero bits + add r0, r2, r3 @ add offset of first set bit +#elif __LINUX_ARM_ARCH__ >= 5 rsb r0, r3, #0 and r3, r3, r0 @ mask out lowest bit set clz r3, r3 @ count high zero bits -- cgit v1.2.3 From 2953a3e187e054a4cfba4190a2037dc16b327372 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 11 Aug 2022 15:53:41 +0100 Subject: ARM: findbit: convert to macros Since the pairs of _find_first and _find_next functions are pretty similar, use macros to generate this code. This commit does not change the generated code. Signed-off-by: Russell King (Oracle) --- arch/arm/lib/findbit.S | 158 +++++++++++++------------------------------------ 1 file changed, 42 insertions(+), 116 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 256e095d490b..8280f66d38a5 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -14,155 +14,81 @@ #include .text -/* - * Purpose : Find a 'zero' bit - * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit); - */ -ENTRY(_find_first_zero_bit_le) - teq r1, #0 + .macro find_first, endian, set, name +ENTRY(_find_first_\name\()bit_\endian) + teq r1, #0 beq 3f mov r2, #0 1: + .ifc \endian, be + eor r3, r2, #0x18 + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + .else ARM( ldrb r3, [r0, r2, lsr #3] ) THUMB( lsr r3, r2, #3 ) THUMB( ldrb r3, [r0, r3] ) + .endif + .ifeq \set eors r3, r3, #0xff @ invert bits + .else + movs r3, r3 + .endif bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits ret lr -ENDPROC(_find_first_zero_bit_le) +ENDPROC(_find_first_\name\()bit_\endian) + .endm -/* - * Purpose : Find next 'zero' bit - * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) - */ -ENTRY(_find_next_zero_bit_le) + .macro find_next, endian, set, name +ENTRY(_find_next_\name\()bit_\endian) cmp r2, r1 bhs 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine + .ifc \endian, be + eor r3, r2, #0x18 + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) + .else ARM( ldrb r3, [r0, r2, lsr #3] ) THUMB( lsr r3, r2, #3 ) THUMB( ldrb r3, [r0, r3] ) + .endif + .ifeq \set eor r3, r3, #0xff @ now looking for a 1 bit + .endif movs r3, r3, lsr ip @ shift off unused bits bne .L_found orr r2, r2, #7 @ if zero, then no bits here add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit -ENDPROC(_find_next_zero_bit_le) +ENDPROC(_find_next_\name\()bit_\endian) + .endm -/* - * Purpose : Find a 'one' bit - * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit); - */ -ENTRY(_find_first_bit_le) - teq r1, #0 - beq 3f - mov r2, #0 -1: - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3 - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - ret lr -ENDPROC(_find_first_bit_le) + .macro find_bit, endian, set, name + find_first \endian, \set, \name + find_next \endian, \set, \name + .endm -/* - * Purpose : Find next 'one' bit - * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) - */ -ENTRY(_find_next_bit_le) - cmp r2, r1 - bhs 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit -ENDPROC(_find_next_bit_le) +/* _find_first_zero_bit_le and _find_next_zero_bit_le */ + find_bit le, 0, zero_ -#ifdef __ARMEB__ +/* _find_first_bit_le and _find_next_bit_le */ + find_bit le, 1 -ENTRY(_find_first_zero_bit_be) - teq r1, #0 - beq 3f - mov r2, #0 -1: eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eors r3, r3, #0xff @ invert bits - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - ret lr -ENDPROC(_find_first_zero_bit_be) +#ifdef __ARMEB__ -ENTRY(_find_next_zero_bit_be) - cmp r2, r1 - bhs 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - eor r3, r3, #0xff @ now looking for a 1 bit - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit -ENDPROC(_find_next_zero_bit_be) +/* _find_first_zero_bit_be and _find_next_zero_bit_be */ + find_bit be, 0, zero_ -ENTRY(_find_first_bit_be) - teq r1, #0 - beq 3f - mov r2, #0 -1: eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3 - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer -2: cmp r2, r1 @ any more? - blo 1b -3: mov r0, r1 @ no free bits - ret lr -ENDPROC(_find_first_bit_be) - -ENTRY(_find_next_bit_be) - cmp r2, r1 - bhs 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - eor r3, r2, #0x18 @ big endian byte ordering - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - movs r3, r3, lsr ip @ shift off unused bits - bne .L_found - orr r2, r2, #7 @ if zero, then no bits here - add r2, r2, #1 @ align bit pointer - b 2b @ loop for next bit -ENDPROC(_find_next_bit_be) +/* _find_first_bit_be and _find_next_bit_be */ + find_bit be, 1 #endif -- cgit v1.2.3 From 2511d032f02e6426b0bd87d225b4b322a7154d15 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 27 Jul 2022 00:27:38 +0100 Subject: ARM: findbit: operate by words Convert the implementations to operate on words rather than bytes which makes bitmap searching faster. Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/assembler.h | 6 ++++ arch/arm/lib/findbit.S | 78 ++++++++++++++++++++++------------------ 2 files changed, 50 insertions(+), 34 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 90fbe4a3f9c8..28e18f79c300 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -761,6 +761,12 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .endif .endm + .if __LINUX_ARM_ARCH__ < 6 + .set .Lrev_l_uses_tmp, 1 + .else + .set .Lrev_l_uses_tmp, 0 + .endif + /* * bl_r - branch and link to register * diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 8280f66d38a5..6ec584d16d46 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -14,32 +14,32 @@ #include .text +#ifdef __ARMEB__ +#define SWAB_ENDIAN le +#else +#define SWAB_ENDIAN be +#endif + .macro find_first, endian, set, name ENTRY(_find_first_\name\()bit_\endian) teq r1, #0 beq 3f mov r2, #0 -1: - .ifc \endian, be - eor r3, r2, #0x18 - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) +1: ldr r3, [r0], #4 + .ifeq \set + mvns r3, r3 @ invert/test bits .else - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) + movs r3, r3 @ test bits .endif - .ifeq \set - eors r3, r3, #0xff @ invert bits + .ifc \endian, SWAB_ENDIAN + bne .L_found_swab .else - movs r3, r3 + bne .L_found @ found the bit? .endif - bne .L_found @ any now set - found zero bit - add r2, r2, #8 @ next bit pointer + add r2, r2, #32 @ next index 2: cmp r2, r1 @ any more? blo 1b -3: mov r0, r1 @ no free bits +3: mov r0, r1 @ no more bits ret lr ENDPROC(_find_first_\name\()bit_\endian) .endm @@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian) ENTRY(_find_next_\name\()bit_\endian) cmp r2, r1 bhs 3b - ands ip, r2, #7 - beq 1b @ If new byte, goto old routine - .ifc \endian, be - eor r3, r2, #0x18 - ARM( ldrb r3, [r0, r3, lsr #3] ) - THUMB( lsr r3, #3 ) - THUMB( ldrb r3, [r0, r3] ) - .else - ARM( ldrb r3, [r0, r2, lsr #3] ) - THUMB( lsr r3, r2, #3 ) - THUMB( ldrb r3, [r0, r3] ) + mov ip, r2, lsr #5 @ word index + add r0, r0, ip, lsl #2 + ands ip, r2, #31 @ bit position + beq 1b + ldr r3, [r0], #4 + .ifeq \set + mvn r3, r3 @ invert bits + .endif + .ifc \endian, SWAB_ENDIAN + rev_l r3, ip + .if .Lrev_l_uses_tmp + @ we need to recompute ip because rev_l will have overwritten + @ it. + and ip, r2, #31 @ bit position .endif - .ifeq \set - eor r3, r3, #0xff @ now looking for a 1 bit .endif movs r3, r3, lsr ip @ shift off unused bits bne .L_found - orr r2, r2, #7 @ if zero, then no bits here + orr r2, r2, #31 @ no zero bits add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit ENDPROC(_find_next_\name\()bit_\endian) @@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian) /* * One or more bits in the LSB of r3 are assumed to be set. */ +.L_found_swab: + rev_l r3, ip .L_found: #if __LINUX_ARM_ARCH__ >= 7 rbit r3, r3 @ reverse bits @@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian) rsb r3, r3, #31 @ offset of first set bit add r0, r2, r3 @ add offset of first set bit #else - tst r3, #0x0f + mov ip, #~0 + tst r3, ip, lsr #16 @ test bits 0-15 + addeq r2, r2, #16 + moveq r3, r3, lsr #16 + tst r3, #0x00ff + addeq r2, r2, #8 + moveq r3, r3, lsr #8 + tst r3, #0x000f addeq r2, r2, #4 - movne r3, r3, lsl #4 - tst r3, #0x30 + moveq r3, r3, lsr #4 + tst r3, #0x0003 addeq r2, r2, #2 - movne r3, r3, lsl #2 - tst r3, #0x40 + moveq r3, r3, lsr #2 + tst r3, #0x0001 addeq r2, r2, #1 mov r0, r2 #endif -- cgit v1.2.3 From f424f2c18432f8a2c35ebafb23dd004148bce149 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 11 Aug 2022 17:03:29 +0100 Subject: ARM: findbit: add unwinder information Add unwinder information so oops in the findbit functions can create a proper backtrace. Signed-off-by: Russell King (Oracle) --- arch/arm/lib/findbit.S | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 6ec584d16d46..b7ac2d3c0748 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -12,6 +12,7 @@ */ #include #include +#include .text #ifdef __ARMEB__ @@ -22,6 +23,7 @@ .macro find_first, endian, set, name ENTRY(_find_first_\name\()bit_\endian) + UNWIND( .fnstart) teq r1, #0 beq 3f mov r2, #0 @@ -41,11 +43,13 @@ ENTRY(_find_first_\name\()bit_\endian) blo 1b 3: mov r0, r1 @ no more bits ret lr + UNWIND( .fnend) ENDPROC(_find_first_\name\()bit_\endian) .endm .macro find_next, endian, set, name ENTRY(_find_next_\name\()bit_\endian) + UNWIND( .fnstart) cmp r2, r1 bhs 3b mov ip, r2, lsr #5 @ word index @@ -69,6 +73,7 @@ ENTRY(_find_next_\name\()bit_\endian) orr r2, r2, #31 @ no zero bits add r2, r2, #1 @ align bit pointer b 2b @ loop for next bit + UNWIND( .fnend) ENDPROC(_find_next_\name\()bit_\endian) .endm @@ -97,6 +102,7 @@ ENDPROC(_find_next_\name\()bit_\endian) * One or more bits in the LSB of r3 are assumed to be set. */ .L_found_swab: + UNWIND( .fnstart) rev_l r3, ip .L_found: #if __LINUX_ARM_ARCH__ >= 7 @@ -130,4 +136,4 @@ ENDPROC(_find_next_\name\()bit_\endian) cmp r1, r0 @ Clamp to maxbit movlo r0, r1 ret lr - + UNWIND( .fnend) -- cgit v1.2.3 From 74c344e6f153dd9ae97c99ad751723e4030d4af9 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 06:16:12 +0100 Subject: ARM: 9267/1: Define Armv8 registers in AArch32 state AArch32 Instruction Set Attribute Register 6 (ID_ISAR6_EL1) and AArch32 Processor Feature Register 2 (ID_PFR2_EL1) identifies some new features for the Armv8 architecture. This registers will be utilized to add hwcaps for those cpu features. These registers are marked as reserved for Armv7 and should be a RAZ. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 775cac3c02bb..0163c3e78a67 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -25,6 +25,8 @@ #define CPUID_EXT_ISAR3 0x6c #define CPUID_EXT_ISAR4 0x70 #define CPUID_EXT_ISAR5 0x74 +#define CPUID_EXT_ISAR6 0x7c +#define CPUID_EXT_PFR2 0x90 #else #define CPUID_EXT_PFR0 "c1, 0" #define CPUID_EXT_PFR1 "c1, 1" @@ -40,6 +42,8 @@ #define CPUID_EXT_ISAR3 "c2, 3" #define CPUID_EXT_ISAR4 "c2, 4" #define CPUID_EXT_ISAR5 "c2, 5" +#define CPUID_EXT_ISAR6 "c2, 7" +#define CPUID_EXT_PFR2 "c3, 4" #endif #define MPIDR_SMP_BITMASK (0x3 << 30) -- cgit v1.2.3 From c00a19c8b143db31d660ee965e6a6f782ef090ff Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 06:28:22 +0100 Subject: ARM: 9268/1: vfp: Add hwcap FPHP and ASIMDHP for FEAT_FP16 Floating point half-precision (FPHP) and Advanced SIMD half-precision (ASIMDHP) are VFP features (FEAT_FP16) represented by MVFR1 identification register. These capabilities can optionally exist with VFPv3 and mandatory with VFPv4. Both these new features exist for Armv8 architecture in AArch32 state. These hwcaps may be useful for the userspace to add conditional check before trying to use FEAT_FP16 feature specific instructions. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/vfp.h | 6 ++++++ arch/arm/include/uapi/asm/hwcap.h | 2 ++ arch/arm/kernel/setup.c | 2 ++ arch/arm/vfp/vfpmodule.c | 4 ++++ 4 files changed, 14 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h index 19928bfb4f9c..157ea3426158 100644 --- a/arch/arm/include/asm/vfp.h +++ b/arch/arm/include/asm/vfp.h @@ -87,6 +87,12 @@ #define MVFR0_DP_BIT (8) #define MVFR0_DP_MASK (0xf << MVFR0_DP_BIT) +/* MVFR1 bits */ +#define MVFR1_ASIMDHP_BIT (20) +#define MVFR1_ASIMDHP_MASK (0xf << MVFR1_ASIMDHP_BIT) +#define MVFR1_FPHP_BIT (24) +#define MVFR1_FPHP_MASK (0xf << MVFR1_FPHP_BIT) + /* Bit patterns for decoding the packaged operation descriptors */ #define VFPOPDESC_LENGTH_BIT (9) #define VFPOPDESC_LENGTH_MASK (0x07 << VFPOPDESC_LENGTH_BIT) diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 990199d8b7c6..8b6f690f0ac4 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -28,6 +28,8 @@ #define HWCAP_IDIV (HWCAP_IDIVA | HWCAP_IDIVT) #define HWCAP_LPAE (1 << 20) #define HWCAP_EVTSTRM (1 << 21) +#define HWCAP_FPHP (1 << 22) +#define HWCAP_ASIMDHP (1 << 23) /* * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2 diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1e8a50a97edf..870e5d5c5b5b 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1249,6 +1249,8 @@ static const char *hwcap_str[] = { "vfpd32", "lpae", "evtstrm", + "fphp", + "asimdhp", NULL }; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 2cb355c1b5b7..55dcd96c7e3b 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -831,6 +831,10 @@ static int __init vfp_init(void) if ((fmrx(MVFR1) & 0xf0000000) == 0x10000000) elf_hwcap |= HWCAP_VFPv4; + if (((fmrx(MVFR1) & MVFR1_ASIMDHP_MASK) >> MVFR1_ASIMDHP_BIT) == 0x2) + elf_hwcap |= HWCAP_ASIMDHP; + if (((fmrx(MVFR1) & MVFR1_FPHP_MASK) >> MVFR1_FPHP_BIT) == 0x3) + elf_hwcap |= HWCAP_FPHP; } /* Extract the architecture version on pre-cpuid scheme */ } else { -- cgit v1.2.3 From 62ea0d873af3ef0a7e8387b67241ad43e3d377e1 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 06:32:56 +0100 Subject: ARM: 9269/1: vfp: Add hwcap for FEAT_DotProd Advanced Dot product is a feature present in AArch32 state for Armv8 and is represented by ISAR6 identification register. This feature denotes the presence of UDOT and SDOT instructions and hence adding a hwcap will enable the userspace to check it before trying to use those instructions. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/uapi/asm/hwcap.h | 1 + arch/arm/kernel/setup.c | 1 + arch/arm/vfp/vfpmodule.c | 10 ++++++++++ 3 files changed, 12 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 8b6f690f0ac4..64f3608e6f1b 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -30,6 +30,7 @@ #define HWCAP_EVTSTRM (1 << 21) #define HWCAP_FPHP (1 << 22) #define HWCAP_ASIMDHP (1 << 23) +#define HWCAP_ASIMDDP (1 << 24) /* * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2 diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 870e5d5c5b5b..9841b897d476 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1251,6 +1251,7 @@ static const char *hwcap_str[] = { "evtstrm", "fphp", "asimdhp", + "asimddp", NULL }; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 55dcd96c7e3b..70f1e0f4eece 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -774,6 +774,7 @@ static int __init vfp_init(void) { unsigned int vfpsid; unsigned int cpu_arch = cpu_architecture(); + unsigned int isar6; /* * Enable the access to the VFP on all online CPUs so the @@ -836,6 +837,15 @@ static int __init vfp_init(void) if (((fmrx(MVFR1) & MVFR1_FPHP_MASK) >> MVFR1_FPHP_BIT) == 0x3) elf_hwcap |= HWCAP_FPHP; } + + /* + * Check for the presence of Advanced SIMD Dot Product + * instructions. + */ + isar6 = read_cpuid_ext(CPUID_EXT_ISAR6); + if (cpuid_feature_extract_field(isar6, 4) == 0x1) + elf_hwcap |= HWCAP_ASIMDDP; + /* Extract the architecture version on pre-cpuid scheme */ } else { if (vfpsid & FPSID_NODOUBLE) { -- cgit v1.2.3 From ce4835497c20991574fde492ab37ec666563d3e4 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 06:37:21 +0100 Subject: ARM: 9270/1: vfp: Add hwcap for FEAT_FHM Floating-point half-precision multiplication (FHM) is a feature present in AArch32 state for Armv8 and is represented by ISAR6.FHM identification register. This feature denotes the presence of VFMAL and VMFSL instructions and hence adding a hwcap will enable the userspace to check it before trying to use those instructions. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/uapi/asm/hwcap.h | 1 + arch/arm/kernel/setup.c | 1 + arch/arm/vfp/vfpmodule.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 64f3608e6f1b..3d168d4f2a51 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -31,6 +31,7 @@ #define HWCAP_FPHP (1 << 22) #define HWCAP_ASIMDHP (1 << 23) #define HWCAP_ASIMDDP (1 << 24) +#define HWCAP_ASIMDFHM (1 << 25) /* * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2 diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 9841b897d476..d88c1326f529 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1252,6 +1252,7 @@ static const char *hwcap_str[] = { "fphp", "asimdhp", "asimddp", + "asimdfhm", NULL }; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 70f1e0f4eece..404c4f901132 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -845,6 +845,12 @@ static int __init vfp_init(void) isar6 = read_cpuid_ext(CPUID_EXT_ISAR6); if (cpuid_feature_extract_field(isar6, 4) == 0x1) elf_hwcap |= HWCAP_ASIMDDP; + /* + * Check for the presence of Advanced SIMD Floating point + * half-precision multiplication instructions. + */ + if (cpuid_feature_extract_field(isar6, 8) == 0x1) + elf_hwcap |= HWCAP_ASIMDFHM; /* Extract the architecture version on pre-cpuid scheme */ } else { -- cgit v1.2.3 From 23b6d4ad6e7a3028dd88aff7e2b0e5a81da8565e Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 06:42:07 +0100 Subject: ARM: 9271/1: vfp: Add hwcap for FEAT_AA32BF16 Advanced SIMD BFloat16 (FEAT_AA32BF16) is a feature present in AArch32 state for Armv8 and is represented by ISAR6.BF16 identification register. This feature denotes the presence of VCVT, VCVTB, VCVTT, VDOT, VFMAB, VFMAT and VMMLA instructions and hence adding a hwcap will enable the userspace to check it before trying to use those instructions. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/uapi/asm/hwcap.h | 1 + arch/arm/kernel/setup.c | 1 + arch/arm/vfp/vfpmodule.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 3d168d4f2a51..14e260e2d6d0 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -32,6 +32,7 @@ #define HWCAP_ASIMDHP (1 << 23) #define HWCAP_ASIMDDP (1 << 24) #define HWCAP_ASIMDFHM (1 << 25) +#define HWCAP_ASIMDBF16 (1 << 26) /* * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2 diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d88c1326f529..070067cb6c77 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1253,6 +1253,7 @@ static const char *hwcap_str[] = { "asimdhp", "asimddp", "asimdfhm", + "asimdbf16", NULL }; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 404c4f901132..ff574cb8a0b9 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -851,6 +851,12 @@ static int __init vfp_init(void) */ if (cpuid_feature_extract_field(isar6, 8) == 0x1) elf_hwcap |= HWCAP_ASIMDFHM; + /* + * Check for the presence of Advanced SIMD Bfloat16 + * floating point instructions. + */ + if (cpuid_feature_extract_field(isar6, 20) == 0x1) + elf_hwcap |= HWCAP_ASIMDBF16; /* Extract the architecture version on pre-cpuid scheme */ } else { -- cgit v1.2.3 From 956ca3a4eb81c1b8cc3226af3083847544dcb098 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 06:58:31 +0100 Subject: ARM: 9272/1: vfp: Add hwcap for FEAT_AA32I8MM Int8 matrix multiplication (FEAT_AA32I8MM) is a feature present in AArch32 state for Armv8 and is represented by ISAR6.I8MM identification register. This feature denotes the presence of VSMMLA, VSUDOT, VUMMLA, VUSMMLA and VUSDOT instructions and hence adding a hwcap will enable the userspace to check it before trying to use those instructions. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/uapi/asm/hwcap.h | 1 + arch/arm/kernel/setup.c | 1 + arch/arm/vfp/vfpmodule.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 14e260e2d6d0..46833c668ec2 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -33,6 +33,7 @@ #define HWCAP_ASIMDDP (1 << 24) #define HWCAP_ASIMDFHM (1 << 25) #define HWCAP_ASIMDBF16 (1 << 26) +#define HWCAP_I8MM (1 << 27) /* * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2 diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 070067cb6c77..5807fa021785 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1254,6 +1254,7 @@ static const char *hwcap_str[] = { "asimddp", "asimdfhm", "asimdbf16", + "i8mm", NULL }; diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index ff574cb8a0b9..281110423871 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -857,6 +857,12 @@ static int __init vfp_init(void) */ if (cpuid_feature_extract_field(isar6, 20) == 0x1) elf_hwcap |= HWCAP_ASIMDBF16; + /* + * Check for the presence of Advanced SIMD and floating point + * Int8 matrix multiplication instructions instructions. + */ + if (cpuid_feature_extract_field(isar6, 24) == 0x1) + elf_hwcap |= HWCAP_I8MM; /* Extract the architecture version on pre-cpuid scheme */ } else { -- cgit v1.2.3 From 3bda6d88489769fba5672dc66debdc1f5516c5fe Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 07:05:19 +0100 Subject: ARM: 9273/1: Add hwcap for Speculation Barrier(SB) Speculation Barrier(FEAT_SB) is a feature present in AArch32 state for Armv8 and is represented by ISAR6.SB identification register. This feature denotes the presence of SB instruction and hence adding a hwcap will enable the userspace to check it before trying to use this instruction. This commit adds the ID feature bit detection, and uses elf_hwcap2 accordingly. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/uapi/asm/hwcap.h | 1 + arch/arm/kernel/setup.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index 46833c668ec2..bc9e7d318e25 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -43,5 +43,6 @@ #define HWCAP2_SHA1 (1 << 2) #define HWCAP2_SHA2 (1 << 3) #define HWCAP2_CRC32 (1 << 4) +#define HWCAP2_SB (1 << 5) #endif /* _UAPI__ASMARM_HWCAP_H */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 5807fa021785..979612add87a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -450,6 +450,7 @@ static void __init cpuid_init_hwcaps(void) { int block; u32 isar5; + u32 isar6; if (cpu_architecture() < CPU_ARCH_ARMv7) return; @@ -485,6 +486,12 @@ static void __init cpuid_init_hwcaps(void) block = cpuid_feature_extract_field(isar5, 16); if (block >= 1) elf_hwcap2 |= HWCAP2_CRC32; + + /* Check for Speculation barrier instruction */ + isar6 = read_cpuid_ext(CPUID_EXT_ISAR6); + block = cpuid_feature_extract_field(isar6, 12); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SB; } static void __init elf_hwcap_fixup(void) @@ -1264,6 +1271,7 @@ static const char *hwcap2_str[] = { "sha1", "sha2", "crc32", + "sb", NULL }; -- cgit v1.2.3 From fea53546be574f2e357fa53f4a582788b20f283c Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 17 Nov 2022 07:10:35 +0100 Subject: ARM: 9274/1: Add hwcap for Speculative Store Bypassing Safe Speculative Store Bypassing Safe(FEAT_SSBS) is a feature present in AArch32 state for Armv8 and is represented by ID_PFR2_EL1.SSBS identification register. This feature denotes the presence of PSTATE.ssbs bit and hence adding a hwcap will enable the userspace to check it before trying to set/unset this PSTATE. This commit adds the ID feature bit detection, and uses elf_hwcap2 accordingly. Reviewed-by: Linus Walleij Signed-off-by: Amit Daniel Kachhap Signed-off-by: Russell King (Oracle) --- arch/arm/include/uapi/asm/hwcap.h | 1 + arch/arm/kernel/setup.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/uapi/asm/hwcap.h b/arch/arm/include/uapi/asm/hwcap.h index bc9e7d318e25..6b2023e39b6f 100644 --- a/arch/arm/include/uapi/asm/hwcap.h +++ b/arch/arm/include/uapi/asm/hwcap.h @@ -44,5 +44,6 @@ #define HWCAP2_SHA2 (1 << 3) #define HWCAP2_CRC32 (1 << 4) #define HWCAP2_SB (1 << 5) +#define HWCAP2_SSBS (1 << 6) #endif /* _UAPI__ASMARM_HWCAP_H */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 979612add87a..4ec3ec32d830 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -451,6 +451,7 @@ static void __init cpuid_init_hwcaps(void) int block; u32 isar5; u32 isar6; + u32 pfr2; if (cpu_architecture() < CPU_ARCH_ARMv7) return; @@ -492,6 +493,12 @@ static void __init cpuid_init_hwcaps(void) block = cpuid_feature_extract_field(isar6, 12); if (block >= 1) elf_hwcap2 |= HWCAP2_SB; + + /* Check for Speculative Store Bypassing control */ + pfr2 = read_cpuid_ext(CPUID_EXT_PFR2); + block = cpuid_feature_extract_field(pfr2, 4); + if (block >= 1) + elf_hwcap2 |= HWCAP2_SSBS; } static void __init elf_hwcap_fixup(void) @@ -1272,6 +1279,7 @@ static const char *hwcap2_str[] = { "sha2", "crc32", "sb", + "ssbs", NULL }; -- cgit v1.2.3 From 59e2cf8d21e05391c42628eb9fb5bb40f9d9698f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 21 Nov 2022 20:21:21 +0100 Subject: ARM: 9275/1: Drop '-mthumb' from AFLAGS_ISA When building with CONFIG_THUMB2_KERNEL=y + a version of clang from Debian using CROSS_COMPILE=arm-linux-gnueabihf-, the following warning occurs frequently: :383:9: warning: '__thumb2__' macro redefined [-Wmacro-redefined] #define __thumb2__ 2 ^ :353:9: note: previous definition is here #define __thumb2__ 1 ^ 1 warning generated. Debian carries a downstream patch that changes the default CPU of the arm-linux-gnueabihf target from 'arm1176jzf-s' (v6) to 'cortex-a7' (v7). As a result, '-mthumb' defines both '__thumb__' and '__thumb2__'. The define of '__thumb2__' via the command line was purposefully added to catch a situation like this. In a similar vein as commit 26b12e084bce ("ARM: 9264/1: only use -mtp=cp15 for the compiler"), do not add '-mthumb' to AFLAGS_ISA, as it is already passed to the assembler via '-Wa,-mthumb' and '__thumb2__' is already defined for preprocessing. Link: https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/raw/622dbcbd40b316ed3905a2d25d9623544a06e6b1/debian/patches/930008-arm.diff Fixes: 1d2e9b67b001 ("ARM: 9265/1: pass -march= only to compiler") Reported-by: "kernelci.org bot" Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Reviewed-by: Ard Biesheuvel Signed-off-by: Nathan Chancellor Signed-off-by: Russell King (Oracle) --- arch/arm/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 357f0d9b8607..d1ebb746ff40 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -131,8 +131,9 @@ endif AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) ifeq ($(CONFIG_THUMB2_KERNEL),y) -CFLAGS_ISA :=-mthumb -Wa,-mimplicit-it=always $(AFLAGS_NOWARN) +CFLAGS_ISA :=-Wa,-mimplicit-it=always $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb -D__thumb2__=2 +CFLAGS_ISA +=-mthumb else CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -- cgit v1.2.3 From 21d0798acf85e9ff8a6c7d8918df7d98e145d688 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 28 Nov 2022 12:42:42 +0100 Subject: ARM: 9276/1: Refactor dump_instr() 1. Rename local variable 'val16' to 'tmp'. So that the processing statements of thumb and arm can be aligned. 2. Fix two sparse check warnings: (add __user for type conversion) warning: incorrect type in initializer (different address spaces) expected unsigned short [noderef] __user *register __p got unsigned short [usertype] * 3. Prepare for the next patch to avoid repeated judgment. Before: if (!user_mode(regs)) { if (thumb) else } else { if (thumb) else } After: if (thumb) { if (user_mode(regs)) else } else { if (user_mode(regs)) else } Signed-off-by: Zhen Lei Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/traps.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 20b2db6dcd1c..a92e07637395 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -178,19 +178,20 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) for (i = -4; i < 1 + !!thumb; i++) { unsigned int val, bad; - if (!user_mode(regs)) { - if (thumb) { - u16 val16; - bad = get_kernel_nofault(val16, &((u16 *)addr)[i]); - val = val16; - } else { - bad = get_kernel_nofault(val, &((u32 *)addr)[i]); - } + if (thumb) { + u16 tmp; + + if (user_mode(regs)) + bad = get_user(tmp, &((u16 __user *)addr)[i]); + else + bad = get_kernel_nofault(tmp, &((u16 *)addr)[i]); + + val = tmp; } else { - if (thumb) - bad = get_user(val, &((u16 *)addr)[i]); + if (user_mode(regs)) + bad = get_user(val, &((u32 __user *)addr)[i]); else - bad = get_user(val, &((u32 *)addr)[i]); + bad = get_kernel_nofault(val, &((u32 *)addr)[i]); } if (!bad) -- cgit v1.2.3 From ba290d4f1f7733732896567d2a1874f01e20fff4 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Mon, 28 Nov 2022 12:42:43 +0100 Subject: ARM: 9277/1: Make the dumped instructions are consistent with the disassembled ones In ARM, the mapping of instruction memory is always little-endian, except some BE-32 supported ARM architectures. Such as ARMv7-R, its instruction endianness may be BE-32. Of course, its data endianness will also be BE-32 mode. Due to two negatives make a positive, the instruction stored in the register after reading is in little-endian format. But for the case of BE-8, the instruction endianness is LE, the instruction stored in the register after reading is in big-endian format, which is inconsistent with the disassembled one. For example: The content of disassembly: c0429ee8: e3500000 cmp r0, #0 c0429eec: 159f2044 ldrne r2, [pc, #68] c0429ef0: 108f2002 addne r2, pc, r2 c0429ef4: 1882000a stmne r2, {r1, r3} c0429ef8: e7f000f0 udf #0 The output of undefined instruction exception: Internal error: Oops - undefined instruction: 0 [#1] SMP ARM ... ... Code: 000050e3 44209f15 02208f10 0a008218 (f000f0e7) This inconveniences the checking of instructions. What's worse is that, for somebody who don't know about this, might think the instructions are all broken. So, when CONFIG_CPU_ENDIAN_BE8=y, let's convert the instructions to little-endian format before they are printed. The conversion result is as follows: Code: e3500000 159f2044 108f2002 1882000a (e7f000f0) Signed-off-by: Zhen Lei Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/traps.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index a92e07637395..40c7c807d67f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -186,12 +186,14 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) else bad = get_kernel_nofault(tmp, &((u16 *)addr)[i]); - val = tmp; + val = __mem_to_opcode_thumb16(tmp); } else { if (user_mode(regs)) bad = get_user(val, &((u32 __user *)addr)[i]); else bad = get_kernel_nofault(val, &((u32 *)addr)[i]); + + val = __mem_to_opcode_arm(val); } if (!bad) -- cgit v1.2.3 From aaa4dd1b47f5ff5ef477fec5dcc6c397b457f1c2 Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sun, 4 Dec 2022 04:46:40 +0100 Subject: ARM: 9279/1: support function error injection This enables HAVE_FUNCTION_ERROR_INJECTION by adding necessary regs_set_return_value() and override_function_with_return(). Simply tested according to Documentation/fault-injection/fault-injection.rst. Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 1 + arch/arm/include/asm/ptrace.h | 4 ++++ arch/arm/lib/Makefile | 2 ++ arch/arm/lib/error-inject.c | 10 ++++++++++ 4 files changed, 17 insertions(+) create mode 100644 arch/arm/lib/error-inject.c (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b3427536731d..981bbfa62b7d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -97,6 +97,7 @@ config ARM select HAVE_EXIT_THREAD select HAVE_FAST_GUP if ARM_LPAE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 1408a6a15d0e..483b8ddfcb82 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -163,6 +163,10 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->ARM_r0 = rc; +} /* * Update ITSTATE after normal execution of an IT block instruction. diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 42fb75c06647..650404be6768 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -44,3 +44,5 @@ ifeq ($(CONFIG_KERNEL_MODE_NEON),y) CFLAGS_xor-neon.o += $(NEON_FLAGS) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o endif + +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/arm/lib/error-inject.c b/arch/arm/lib/error-inject.c new file mode 100644 index 000000000000..5a5b405792ba --- /dev/null +++ b/arch/arm/lib/error-inject.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +void override_function_with_return(struct pt_regs *regs) +{ + instruction_pointer_set(regs, regs->ARM_lr); +} +NOKPROBE_SYMBOL(override_function_with_return); -- cgit v1.2.3