diff options
Diffstat (limited to 'arch/riscv/kernel')
-rw-r--r-- | arch/riscv/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/riscv/kernel/cpu-hotplug.c | 2 | ||||
-rw-r--r-- | arch/riscv/kernel/cpu.c | 70 | ||||
-rw-r--r-- | arch/riscv/kernel/cpu_ops_sbi.c | 2 | ||||
-rw-r--r-- | arch/riscv/kernel/cpufeature.c | 130 | ||||
-rw-r--r-- | arch/riscv/kernel/entry.S | 14 | ||||
-rw-r--r-- | arch/riscv/kernel/head.S | 11 | ||||
-rw-r--r-- | arch/riscv/kernel/module.c | 21 | ||||
-rw-r--r-- | arch/riscv/kernel/perf_callchain.c | 4 | ||||
-rw-r--r-- | arch/riscv/kernel/perf_event.c | 485 | ||||
-rw-r--r-- | arch/riscv/kernel/ptrace.c | 5 | ||||
-rw-r--r-- | arch/riscv/kernel/sbi.c | 72 | ||||
-rw-r--r-- | arch/riscv/kernel/setup.c | 3 | ||||
-rw-r--r-- | arch/riscv/kernel/signal.c | 6 | ||||
-rw-r--r-- | arch/riscv/kernel/stacktrace.c | 9 | ||||
-rw-r--r-- | arch/riscv/kernel/trace_irq.c | 27 | ||||
-rw-r--r-- | arch/riscv/kernel/trace_irq.h | 11 |
17 files changed, 309 insertions, 566 deletions
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 612556faa527..e0133d113216 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -51,7 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o +obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o + obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index be7f05b542bb..f7a832e3a1d1 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -12,6 +12,7 @@ #include <linux/sched/hotplug.h> #include <asm/irq.h> #include <asm/cpu_ops.h> +#include <asm/numa.h> #include <asm/sbi.h> bool cpu_has_hotplug(unsigned int cpu) @@ -40,6 +41,7 @@ int __cpu_disable(void) return ret; remove_cpu_topology(cpu); + numa_remove_cpu(cpu); set_cpu_online(cpu, false); irq_migrate_all_off_this_cpu(); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index ad0a7e9f828b..d2a936195295 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -6,6 +6,7 @@ #include <linux/init.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <asm/hwcap.h> #include <asm/smp.h> #include <asm/pgtable.h> @@ -63,12 +64,73 @@ int riscv_of_parent_hartid(struct device_node *node) } #ifdef CONFIG_PROC_FS +#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ + { \ + .uprop = #UPROP, \ + .isa_ext_id = EXTID, \ + } +/** + * Here are the ordering rules of extension naming defined by RISC-V + * specification : + * 1. All extensions should be separated from other multi-letter extensions + * from other multi-letter extensions by an underscore. + * 2. The first letter following the 'Z' conventionally indicates the most + * closely related alphabetical extension category, IMAFDQLCBKJTPVH. + * If multiple 'Z' extensions are named, they should be ordered first + * by category, then alphabetically within a category. + * 3. Standard supervisor-level extensions (starts with 'S') should be + * listed after standard unprivileged extensions. If multiple + * supervisor-level extensions are listed, they should be ordered + * alphabetically. + * 4. Non-standard extensions (starts with 'X') must be listed after all + * standard extensions. They must be separated from other multi-letter + * extensions by an underscore. + */ +static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), +}; + +static void print_isa_ext(struct seq_file *f) +{ + struct riscv_isa_ext_data *edata; + int i = 0, arr_sz; + + arr_sz = ARRAY_SIZE(isa_ext_arr) - 1; + + /* No extension support available */ + if (arr_sz <= 0) + return; + + for (i = 0; i <= arr_sz; i++) { + edata = &isa_ext_arr[i]; + if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id)) + continue; + seq_printf(f, "_%s", edata->uprop); + } +} + +/** + * These are the only valid base (single letter) ISA extensions as per the spec. + * It also specifies the canonical order in which it appears in the spec. + * Some of the extension may just be a place holder for now (B, K, P, J). + * This should be updated once corresponding extensions are ratified. + */ +static const char base_riscv_exts[13] = "imafdqcbkjpvh"; static void print_isa(struct seq_file *f, const char *isa) { - /* Print the entire ISA as it is */ + int i; + seq_puts(f, "isa\t\t: "); - seq_write(f, isa, strlen(isa)); + /* Print the rv[64/32] part */ + seq_write(f, isa, 4); + for (i = 0; i < sizeof(base_riscv_exts); i++) { + if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a')) + /* Print only enabled the base ISA extensions */ + seq_write(f, &base_riscv_exts[i], 1); + } + print_isa_ext(f); seq_puts(f, "\n"); } @@ -79,7 +141,9 @@ static void print_mmu(struct seq_file *f) #if defined(CONFIG_32BIT) strncpy(sv_type, "sv32", 5); #elif defined(CONFIG_64BIT) - if (pgtable_l4_enabled) + if (pgtable_l5_enabled) + strncpy(sv_type, "sv57", 5); + else if (pgtable_l4_enabled) strncpy(sv_type, "sv48", 5); else strncpy(sv_type, "sv39", 5); diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index dae29cbfe550..2e16f6732cdf 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -111,7 +111,7 @@ static int sbi_cpu_is_stopped(unsigned int cpuid) rc = sbi_hsm_hart_get_status(hartid); - if (rc == SBI_HSM_HART_STATUS_STOPPED) + if (rc == SBI_HSM_STATE_STOPPED) return 0; return rc; } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index d959d207a40d..1b2d42d7f589 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -7,12 +7,15 @@ */ #include <linux/bitmap.h> +#include <linux/ctype.h> #include <linux/of.h> #include <asm/processor.h> #include <asm/hwcap.h> #include <asm/smp.h> #include <asm/switch_to.h> +#define NUM_ALPHA_EXTS ('z' - 'a' + 1) + unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -63,8 +66,8 @@ void __init riscv_fill_hwcap(void) { struct device_node *node; const char *isa; - char print_str[BITS_PER_LONG + 1]; - size_t i, j, isa_len; + char print_str[NUM_ALPHA_EXTS + 1]; + int i, j; static unsigned long isa2hwcap[256] = {0}; isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; @@ -80,7 +83,8 @@ void __init riscv_fill_hwcap(void) for_each_of_cpu_node(node) { unsigned long this_hwcap = 0; - unsigned long this_isa = 0; + DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); + const char *temp; if (riscv_of_processor_hartid(node) < 0) continue; @@ -90,23 +94,106 @@ void __init riscv_fill_hwcap(void) continue; } - i = 0; - isa_len = strlen(isa); + temp = isa; #if IS_ENABLED(CONFIG_32BIT) if (!strncmp(isa, "rv32", 4)) - i += 4; + isa += 4; #elif IS_ENABLED(CONFIG_64BIT) if (!strncmp(isa, "rv64", 4)) - i += 4; + isa += 4; #endif - for (; i < isa_len; ++i) { - this_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; - /* - * TODO: X, Y and Z extension parsing for Host ISA - * bitmap will be added in-future. - */ - if ('a' <= isa[i] && isa[i] < 'x') - this_isa |= (1UL << (isa[i] - 'a')); + /* The riscv,isa DT property must start with rv64 or rv32 */ + if (temp == isa) + continue; + bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); + for (; *isa; ++isa) { + const char *ext = isa++; + const char *ext_end = isa; + bool ext_long = false, ext_err = false; + + switch (*ext) { + case 's': + /** + * Workaround for invalid single-letter 's' & 'u'(QEMU). + * No need to set the bit in riscv_isa as 's' & 'u' are + * not valid ISA extensions. It works until multi-letter + * extension starting with "Su" appears. + */ + if (ext[-1] != '_' && ext[1] == 'u') { + ++isa; + ext_err = true; + break; + } + fallthrough; + case 'x': + case 'z': + ext_long = true; + /* Multi-letter extension must be delimited */ + for (; *isa && *isa != '_'; ++isa) + if (unlikely(!islower(*isa) + && !isdigit(*isa))) + ext_err = true; + /* Parse backwards */ + ext_end = isa; + if (unlikely(ext_err)) + break; + if (!isdigit(ext_end[-1])) + break; + /* Skip the minor version */ + while (isdigit(*--ext_end)) + ; + if (ext_end[0] != 'p' + || !isdigit(ext_end[-1])) { + /* Advance it to offset the pre-decrement */ + ++ext_end; + break; + } + /* Skip the major version */ + while (isdigit(*--ext_end)) + ; + ++ext_end; + break; + default: + if (unlikely(!islower(*ext))) { + ext_err = true; + break; + } + /* Find next extension */ + if (!isdigit(*isa)) + break; + /* Skip the minor version */ + while (isdigit(*++isa)) + ; + if (*isa != 'p') + break; + if (!isdigit(*++isa)) { + --isa; + break; + } + /* Skip the major version */ + while (isdigit(*++isa)) + ; + break; + } + if (*isa != '_') + --isa; + +#define SET_ISA_EXT_MAP(name, bit) \ + do { \ + if ((ext_end - ext == sizeof(name) - 1) && \ + !memcmp(ext, name, sizeof(name) - 1)) \ + set_bit(bit, this_isa); \ + } while (false) \ + + if (unlikely(ext_err)) + continue; + if (!ext_long) { + this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; + set_bit(*ext - 'a', this_isa); + } else { + SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); + } +#undef SET_ISA_EXT_MAP } /* @@ -119,10 +206,11 @@ void __init riscv_fill_hwcap(void) else elf_hwcap = this_hwcap; - if (riscv_isa[0]) - riscv_isa[0] &= this_isa; + if (bitmap_weight(riscv_isa, RISCV_ISA_EXT_MAX)) + bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); else - riscv_isa[0] = this_isa; + bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + } /* We don't support systems with F but without D, so mask those out @@ -133,13 +221,13 @@ void __init riscv_fill_hwcap(void) } memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) print_str[j++] = (char)('a' + i); - pr_info("riscv: ISA extensions %s\n", print_str); + pr_info("riscv: base ISA extensions %s\n", print_str); memset(print_str, 0, sizeof(print_str)); - for (i = 0, j = 0; i < BITS_PER_LONG; i++) + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (elf_hwcap & BIT_MASK(i)) print_str[j++] = (char)('a' + i); pr_info("riscv: ELF capabilities %s\n", print_str); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index ed29e9c8f660..c8b9ce274b9a 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -108,7 +108,7 @@ _save_context: .option pop #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_CONTEXT_TRACKING @@ -143,7 +143,7 @@ skip_context_tracking: li t0, EXC_BREAKPOINT beq s4, t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on + call __trace_hardirqs_on #endif csrs CSR_STATUS, SR_IE @@ -225,6 +225,10 @@ ret_from_syscall: * (If it was configured with SECCOMP_RET_ERRNO/TRACE) */ ret_from_syscall_rejected: +#ifdef CONFIG_DEBUG_RSEQ + move a0, sp + call rseq_syscall +#endif /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK @@ -234,7 +238,7 @@ ret_from_exception: REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ @@ -270,10 +274,10 @@ restore_all: REG_L s1, PT_STATUS(sp) andi t0, s1, SR_PIE beqz t0, 1f - call trace_hardirqs_on + call __trace_hardirqs_on j 2f 1: - call trace_hardirqs_off + call __trace_hardirqs_off 2: #endif REG_L a0, PT_STATUS(sp) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 2363b43312fc..ec07f991866a 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -22,14 +22,13 @@ add \reg, \reg, t0 .endm .macro XIP_FIXUP_FLASH_OFFSET reg - la t1, __data_loc - li t0, XIP_OFFSET_MASK - and t1, t1, t0 - li t1, XIP_OFFSET - sub t0, t0, t1 - sub \reg, \reg, t0 + la t0, __data_loc + REG_L t1, _xip_phys_offset + sub \reg, \reg, t1 + add \reg, \reg, t0 .endm _xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET +_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET #else .macro XIP_FIXUP_OFFSET reg .endm diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 68a9e3d1fe16..4a48287513c3 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -13,6 +13,19 @@ #include <linux/pgtable.h> #include <asm/sections.h> +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { @@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 1fc075b8f764..55faa4991b87 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -15,8 +15,8 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, { struct stackframe buftail; unsigned long ra = 0; - unsigned long *user_frame_tail = - (unsigned long *)(fp - sizeof(struct stackframe)); + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); /* Check accessibility of one struct frame_tail beyond */ if (!access_ok(user_frame_tail, sizeof(buftail))) diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c deleted file mode 100644 index c835f0362d94..000000000000 --- a/arch/riscv/kernel/perf_event.c +++ /dev/null @@ -1,485 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> - * Copyright (C) 2009 Google, Inc., Stephane Eranian - * Copyright 2014 Tilera Corporation. All Rights Reserved. - * Copyright (C) 2018 Andes Technology Corporation - * - * Perf_events support for RISC-V platforms. - * - * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough - * functionality for perf event to fully work, this file provides - * the very basic framework only. - * - * For platform portings, please check Documentations/riscv/pmu.txt. - * - * The Copyright line includes x86 and tile ones. - */ - -#include <linux/kprobes.h> -#include <linux/kernel.h> -#include <linux/kdebug.h> -#include <linux/mutex.h> -#include <linux/bitmap.h> -#include <linux/irq.h> -#include <linux/perf_event.h> -#include <linux/atomic.h> -#include <linux/of.h> -#include <asm/perf_event.h> - -static const struct riscv_pmu *riscv_pmu __read_mostly; -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); - -/* - * Hardware & cache maps and their methods - */ - -static const int riscv_hw_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE, - [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET, - [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP, - [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP, -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x -static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX] -[PERF_COUNT_HW_CACHE_OP_MAX] -[PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP, - [C(RESULT_MISS)] = RISCV_OP_UNSUPP, - }, - }, -}; - -static int riscv_map_hw_event(u64 config) -{ - if (config >= riscv_pmu->max_events) - return -EINVAL; - - return riscv_pmu->hw_events[config]; -} - -static int riscv_map_cache_decode(u64 config, unsigned int *type, - unsigned int *op, unsigned int *result) -{ - return -ENOENT; -} - -static int riscv_map_cache_event(u64 config) -{ - unsigned int type, op, result; - int err = -ENOENT; - int code; - - err = riscv_map_cache_decode(config, &type, &op, &result); - if (!riscv_pmu->cache_events || err) - return err; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - code = (*riscv_pmu->cache_events)[type][op][result]; - if (code == RISCV_OP_UNSUPP) - return -EINVAL; - - return code; -} - -/* - * Low-level functions: reading/writing counters - */ - -static inline u64 read_counter(int idx) -{ - u64 val = 0; - - switch (idx) { - case RISCV_PMU_CYCLE: - val = csr_read(CSR_CYCLE); - break; - case RISCV_PMU_INSTRET: - val = csr_read(CSR_INSTRET); - break; - default: - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS); - return -EINVAL; - } - - return val; -} - -static inline void write_counter(int idx, u64 value) -{ - /* currently not supported */ - WARN_ON_ONCE(1); -} - -/* - * pmu->read: read and update the counter - * - * Other architectures' implementation often have a xxx_perf_event_update - * routine, which can return counter values when called in the IRQ, but - * return void when being called by the pmu->read method. - */ -static void riscv_pmu_read(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 prev_raw_count, new_raw_count; - u64 oldval; - int idx = hwc->idx; - u64 delta; - - do { - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = read_counter(idx); - - oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count); - } while (oldval != prev_raw_count); - - /* - * delta is the value to update the counter we maintain in the kernel. - */ - delta = (new_raw_count - prev_raw_count) & - ((1ULL << riscv_pmu->counter_width) - 1); - local64_add(delta, &event->count); - /* - * Something like local64_sub(delta, &hwc->period_left) here is - * needed if there is an interrupt for perf. - */ -} - -/* - * State transition functions: - * - * stop()/start() & add()/del() - */ - -/* - * pmu->stop: stop the counter - */ -static void riscv_pmu_stop(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - riscv_pmu->pmu->read(event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -/* - * pmu->start: start the event. - */ -static void riscv_pmu_start(struct perf_event *event, int flags) -{ - struct hw_perf_event *hwc = &event->hw; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - if (flags & PERF_EF_RELOAD) { - WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - - /* - * Set the counter to the period to the next interrupt here, - * if you have any. - */ - } - - hwc->state = 0; - perf_event_update_userpage(event); - - /* - * Since we cannot write to counters, this serves as an initialization - * to the delta-mechanism in pmu->read(); otherwise, the delta would be - * wrong when pmu->read is called for the first time. - */ - local64_set(&hwc->prev_count, read_counter(hwc->idx)); -} - -/* - * pmu->add: add the event to PMU. - */ -static int riscv_pmu_add(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - if (cpuc->n_events == riscv_pmu->num_counters) - return -ENOSPC; - - /* - * We don't have general conunters, so no binding-event-to-counter - * process here. - * - * Indexing using hwc->config generally not works, since config may - * contain extra information, but here the only info we have in - * hwc->config is the event index. - */ - hwc->idx = hwc->config; - cpuc->events[hwc->idx] = event; - cpuc->n_events++; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - - if (flags & PERF_EF_START) - riscv_pmu->pmu->start(event, PERF_EF_RELOAD); - - return 0; -} - -/* - * pmu->del: delete the event from PMU. - */ -static void riscv_pmu_del(struct perf_event *event, int flags) -{ - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - cpuc->events[hwc->idx] = NULL; - cpuc->n_events--; - riscv_pmu->pmu->stop(event, PERF_EF_UPDATE); - perf_event_update_userpage(event); -} - -/* - * Interrupt: a skeletion for reference. - */ - -static DEFINE_MUTEX(pmc_reserve_mutex); - -static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) -{ - return IRQ_NONE; -} - -static int reserve_pmc_hardware(void) -{ - int err = 0; - - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) { - err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq, - IRQF_PERCPU, "riscv-base-perf", NULL); - } - mutex_unlock(&pmc_reserve_mutex); - - return err; -} - -static void release_pmc_hardware(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (riscv_pmu->irq >= 0) - free_irq(riscv_pmu->irq, NULL); - mutex_unlock(&pmc_reserve_mutex); -} - -/* - * Event Initialization/Finalization - */ - -static atomic_t riscv_active_events = ATOMIC_INIT(0); - -static void riscv_event_destroy(struct perf_event *event) -{ - if (atomic_dec_return(&riscv_active_events) == 0) - release_pmc_hardware(); -} - -static int riscv_event_init(struct perf_event *event) -{ - struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - int err; - int code; - - if (atomic_inc_return(&riscv_active_events) == 1) { - err = reserve_pmc_hardware(); - - if (err) { - pr_warn("PMC hardware not available\n"); - atomic_dec(&riscv_active_events); - return -EBUSY; - } - } - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - code = riscv_pmu->map_hw_event(attr->config); - break; - case PERF_TYPE_HW_CACHE: - code = riscv_pmu->map_cache_event(attr->config); - break; - case PERF_TYPE_RAW: - return -EOPNOTSUPP; - default: - return -ENOENT; - } - - event->destroy = riscv_event_destroy; - if (code < 0) { - event->destroy(event); - return code; - } - - /* - * idx is set to -1 because the index of a general event should not be - * decided until binding to some counter in pmu->add(). - * - * But since we don't have such support, later in pmu->add(), we just - * use hwc->config as the index instead. - */ - hwc->config = code; - hwc->idx = -1; - - return 0; -} - -/* - * Initialization - */ - -static struct pmu min_pmu = { - .name = "riscv-base", - .event_init = riscv_event_init, - .add = riscv_pmu_add, - .del = riscv_pmu_del, - .start = riscv_pmu_start, - .stop = riscv_pmu_stop, - .read = riscv_pmu_read, -}; - -static const struct riscv_pmu riscv_base_pmu = { - .pmu = &min_pmu, - .max_events = ARRAY_SIZE(riscv_hw_event_map), - .map_hw_event = riscv_map_hw_event, - .hw_events = riscv_hw_event_map, - .map_cache_event = riscv_map_cache_event, - .cache_events = &riscv_cache_event_map, - .counter_width = 63, - .num_counters = RISCV_BASE_COUNTERS + 0, - .handle_irq = &riscv_base_pmu_handle_irq, - - /* This means this PMU has no IRQ. */ - .irq = -1, -}; - -static const struct of_device_id riscv_pmu_of_ids[] = { - {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu}, - { /* sentinel value */ } -}; - -static int __init init_hw_perf_events(void) -{ - struct device_node *node = of_find_node_by_type(NULL, "pmu"); - const struct of_device_id *of_id; - - riscv_pmu = &riscv_base_pmu; - - if (node) { - of_id = of_match_node(riscv_pmu_of_ids, node); - - if (of_id) - riscv_pmu = of_id->data; - of_node_put(node); - } - - perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW); - return 0; -} -arch_initcall(init_hw_perf_events); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index a89243730153..793c7da0554b 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -17,7 +17,6 @@ #include <linux/regset.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> -#include <linux/tracehook.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -241,7 +240,7 @@ long arch_ptrace(struct task_struct *child, long request, __visible int do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) return -1; /* @@ -266,7 +265,7 @@ __visible void do_syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f72527fcb347..775d3322b422 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -5,6 +5,7 @@ * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ +#include <linux/bits.h> #include <linux/init.h> #include <linux/pm.h> #include <linux/reboot.h> @@ -85,7 +86,7 @@ static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mas pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); break; } - hmask |= 1 << hartid; + hmask |= BIT(hartid); } return hmask; @@ -160,7 +161,7 @@ static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -176,7 +177,7 @@ static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, int result = 0; unsigned long hart_mask; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); @@ -249,26 +250,37 @@ static void __sbi_set_timer_v02(uint64_t stime_value) static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; struct sbiret ret = {0}; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask, hbase, 0, 0, 0, 0); - if (ret.error) - goto ecall_failed; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + ret = sbi_ecall(SBI_EXT_IPI, + SBI_EXT_IPI_SEND_IPI, hmask, + hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { @@ -344,25 +356,35 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { - unsigned long hartid, cpuid, hmask = 0, hbase = 0; + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; int result; - if (!cpu_mask) + if (!cpu_mask || cpumask_empty(cpu_mask)) cpu_mask = cpu_online_mask; for_each_cpu(cpuid, cpu_mask) { hartid = cpuid_to_hartid_map(cpuid); - if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { - result = __sbi_rfence_v02_call(fid, hmask, hbase, - start, size, arg4, arg5); - if (result) - return result; - hmask = 0; - hbase = 0; + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + result = __sbi_rfence_v02_call(fid, hmask, + hbase, start, size, arg4, arg5); + if (result) + return result; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } } - if (!hmask) + if (!hmask) { hbase = hartid; - hmask |= 1UL << (hartid - hbase); + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); } if (hmask) { diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b42bfdc67482..834eb652a7b9 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -301,9 +301,6 @@ static int __init topology_init(void) { int i, ret; - for_each_online_node(i) - register_one_node(i); - for_each_possible_cpu(i) { struct cpu *cpu = &per_cpu(cpu_devices, i); diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c2d5ecbe5526..9f4e59f80551 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -9,7 +9,7 @@ #include <linux/signal.h> #include <linux/uaccess.h> #include <linux/syscalls.h> -#include <linux/tracehook.h> +#include <linux/resume_user_mode.h> #include <linux/linkage.h> #include <asm/ucontext.h> @@ -258,6 +258,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) } } + rseq_signal_deliver(ksig, regs); + /* Set up the stack frame */ ret = setup_rt_frame(ksig, oldset, regs); @@ -317,5 +319,5 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 201ee206fb57..14d2b53ec322 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -22,15 +22,16 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int level = 0; if (regs) { fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(1); - sp = (unsigned long)__builtin_frame_address(0); - pc = (unsigned long)__builtin_return_address(0); + fp = (unsigned long)__builtin_frame_address(0); + sp = sp_in_global; + pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -42,7 +43,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc)))) break; /* Validate frame pointer */ diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c new file mode 100644 index 000000000000..095ac976d7da --- /dev/null +++ b/arch/riscv/kernel/trace_irq.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ + +#include <linux/irqflags.h> +#include <linux/kprobes.h> +#include "trace_irq.h" + +/* + * trace_hardirqs_on/off require the caller to setup frame pointer properly. + * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. + * Here we add one extra level so they can be safely called by low + * level entry code which $fp is used for other purpose. + */ + +void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_on); + +void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h new file mode 100644 index 000000000000..99fe67377e5e --- /dev/null +++ b/arch/riscv/kernel/trace_irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ +#ifndef __TRACE_IRQ_H +#define __TRACE_IRQ_H + +void __trace_hardirqs_on(void); +void __trace_hardirqs_off(void); + +#endif /* __TRACE_IRQ_H */ |