diff options
Diffstat (limited to 'arch/riscv')
46 files changed, 1409 insertions, 509 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 669a65d8f1d4..2b41f6d8e458 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -14,6 +14,7 @@ config RISCV def_bool y select ARCH_CLOCKSOURCE_INIT select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_STACKWALK select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU @@ -38,6 +39,7 @@ config RISCV select GENERIC_ARCH_TOPOLOGY if SMP select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS + select GENERIC_EARLY_IOREMAP select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO select GENERIC_IOREMAP select GENERIC_IRQ_MULTI_HANDLER @@ -68,6 +70,7 @@ config RISCV select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO if MMU && 64BIT + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -88,6 +91,7 @@ config RISCV select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK + select UACCESS_MEMCPY if !MMU config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -335,19 +339,6 @@ menu "Kernel features" source "kernel/Kconfig.hz" -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - config RISCV_SBI_V01 bool "SBI v0.1 support" default y @@ -402,6 +393,28 @@ config CMDLINE_FORCE endchoice +config EFI_STUB + bool + +config EFI + bool "UEFI runtime support" + depends on OF + select LIBFDT + select UCS2_STRING + select EFI_PARAMS_FROM_FDT + select EFI_STUB + select EFI_GENERIC_STUB + select EFI_RUNTIME_WRAPPERS + select RISCV_ISA_C + depends on MMU + default y + help + This option provides support for runtime services provided + by UEFI firmware (such as non-volatile variables, realtime + clock, and platform reset). A UEFI stub is also provided to + allow the kernel to be booted as an EFI application. This + is only useful on systems that have UEFI firmware. + endmenu config BUILTIN_DTB @@ -414,3 +427,5 @@ menu "Power management options" source "kernel/power/Kconfig" endmenu + +source "drivers/firmware/Kconfig" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index fb6e37db836d..8c29e553ef7f 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -53,9 +53,6 @@ endif ifeq ($(CONFIG_CMODEL_MEDANY),y) KBUILD_CFLAGS += -mcmodel=medany endif -ifeq ($(CONFIG_MODULE_SECTIONS),y) - KBUILD_LDS_MODULE += $(srctree)/arch/riscv/kernel/module.lds -endif ifeq ($(CONFIG_PERF_EVENTS),y) KBUILD_CFLAGS += -fno-omit-frame-pointer endif @@ -80,6 +77,7 @@ head-y := arch/riscv/kernel/head.o core-y += arch/riscv/ libs-y += arch/riscv/lib/ +libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a PHONY += vdso_install vdso_install: @@ -98,5 +96,11 @@ $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ @$(kecho) ' Kernel: $(boot)/$@ is ready' +Image.%: Image + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + zinstall install: $(Q)$(MAKE) $(build)=$(boot) $@ + +archclean: + $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore index 574c10f8ff68..90e66adb7de5 100644 --- a/arch/riscv/boot/.gitignore +++ b/arch/riscv/boot/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only Image -Image.gz +Image.* loader loader.lds +loader.bin diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index c59fca695f9d..03404c84f971 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -18,7 +18,7 @@ KCOV_INSTRUMENT := n OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image loader +targets := Image Image.* loader loader.o loader.lds loader.bin $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index d58c93efb603..d222d353d86d 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -130,3 +130,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_EFI=y diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 3d9410bb4de0..59dd7be55005 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h generic-y += kvm_para.h diff --git a/arch/riscv/include/asm/cacheinfo.h b/arch/riscv/include/asm/cacheinfo.h index 5d9662e9aba8..d1a365215ec0 100644 --- a/arch/riscv/include/asm/cacheinfo.h +++ b/arch/riscv/include/asm/cacheinfo.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 SiFive + */ #ifndef _ASM_RISCV_CACHEINFO_H #define _ASM_RISCV_CACHEINFO_H @@ -11,5 +14,7 @@ struct riscv_cacheinfo_ops { }; void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops); +uintptr_t get_cache_size(u32 level, enum cache_type type); +uintptr_t get_cache_geometry(u32 level, enum cache_type type); #endif /* _ASM_RISCV_CACHEINFO_H */ diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h new file mode 100644 index 000000000000..7542282f1141 --- /dev/null +++ b/arch/riscv/include/asm/efi.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#ifndef _ASM_EFI_H +#define _ASM_EFI_H + +#include <asm/csr.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/ptrace.h> +#include <asm/tlbflush.h> + +#ifdef CONFIG_EFI +extern void efi_init(void); +#else +#define efi_init() +#endif + +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); +int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + +#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_teardown() efi_virtmap_unload() + +#define arch_efi_call_virt(p, f, args...) p->f(args) + +#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) + +/* on RISC-V, the FDT may be located anywhere in system RAM */ +static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) +{ + return ULONG_MAX; +} + +/* Load initrd at enough distance from DRAM start */ +static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) +{ + return image_addr + SZ_256M; +} + +#define alloc_screen_info(x...) (&screen_info) + +static inline void free_screen_info(struct screen_info *si) +{ +} + +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) +{ +} + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + +#endif /* _ASM_EFI_H */ diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index d83a4efd052b..5c725e1df58b 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -11,6 +11,7 @@ #include <uapi/asm/elf.h> #include <asm/auxvec.h> #include <asm/byteorder.h> +#include <asm/cacheinfo.h> /* * These are used to set parameters in the core dumps. @@ -61,6 +62,18 @@ extern unsigned long elf_hwcap; do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (elf_addr_t)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_L1I_CACHESIZE, \ + get_cache_size(1, CACHE_TYPE_INST)); \ + NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, \ + get_cache_geometry(1, CACHE_TYPE_INST)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, \ + get_cache_size(1, CACHE_TYPE_DATA)); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, \ + get_cache_geometry(1, CACHE_TYPE_DATA)); \ + NEW_AUX_ENT(AT_L2_CACHESIZE, \ + get_cache_size(2, CACHE_TYPE_UNIFIED)); \ + NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, \ + get_cache_geometry(2, CACHE_TYPE_UNIFIED)); \ } while (0) #define ARCH_HAS_SETUP_ADDITIONAL_PAGES struct linux_binprm; diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 1ff075a8dfc7..54cbf07fb4e9 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -22,14 +22,24 @@ */ enum fixed_addresses { FIX_HOLE, -#define FIX_FDT_SIZE SZ_1M - FIX_FDT_END, - FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, FIX_PTE, FIX_PMD, FIX_TEXT_POKE1, FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, + + __end_of_permanent_fixed_addresses, + /* + * Temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + */ +#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) +#define FIX_BTMAPS_SLOTS 7 +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + __end_of_fixed_addresses }; diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 3835c3295dc5..c025a746a148 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/pgtable.h> #include <asm/mmiowb.h> +#include <asm/early_ioremap.h> /* * MMIO access functions are separated out to break dependency cycles diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 967eacb01ab5..dabcf2cfb3dc 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -20,6 +20,8 @@ typedef struct { #endif } mm_context_t; +void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot); #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_MMU_H */ diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/include/asm/module.lds.h index 295ecfb341a2..4254ff2ff049 100644 --- a/arch/riscv/kernel/module.lds +++ b/arch/riscv/include/asm/module.lds.h @@ -1,8 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2017 Andes Technology Corporation */ - +#ifdef CONFIG_MODULE_SECTIONS SECTIONS { .plt (NOLOAD) : { BYTE(0) } .got (NOLOAD) : { BYTE(0) } .got.plt (NOLOAD) : { BYTE(0) } } +#endif diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index eaea1f717010..183f1f4b2ae6 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -100,6 +100,10 @@ #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) +#define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) +#define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \ + | _PAGE_EXEC) #define PAGE_TABLE __pgprot(_PAGE_TABLE) @@ -464,6 +468,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl #define kern_addr_valid(addr) (1) /* FIXME */ extern void *dtb_early_va; +extern uintptr_t dtb_early_pa; void setup_bootmem(void); void paging_init(void); diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h new file mode 100644 index 000000000000..1595c5b60cfd --- /dev/null +++ b/arch/riscv/include/asm/sections.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#ifndef __ASM_SECTIONS_H +#define __ASM_SECTIONS_H + +#include <asm-generic/sections.h> + +extern char _start[]; +extern char _start_kernel[]; +extern char __init_data_begin[], __init_data_end[]; +extern char __init_text_begin[], __init_text_end[]; + +#endif /* __ASM_SECTIONS_H */ diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index 4c5bae7ca01c..b21f4bea6434 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -15,11 +15,15 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +int set_memory_rw_nx(unsigned long addr, int numpages); +void protect_kernel_text_data(void); #else static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +static inline void protect_kernel_text_data(void) {}; +static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; } #endif int set_direct_map_invalid_noflush(struct page *page); diff --git a/arch/riscv/include/asm/soc.h b/arch/riscv/include/asm/soc.h index 136a442ef876..6c8363b1f327 100644 --- a/arch/riscv/include/asm/soc.h +++ b/arch/riscv/include/asm/soc.h @@ -13,7 +13,7 @@ #define SOC_EARLY_INIT_DECLARE(name, compat, fn) \ static const struct of_device_id __soc_early_init__##name \ - __used __section(__soc_early_init_table) \ + __used __section("__soc_early_init_table") \ = { .compatible = compat, .data = fn } void soc_early_init(void); @@ -46,7 +46,7 @@ struct soc_builtin_dtb { } \ \ static const struct soc_builtin_dtb __soc_builtin_dtb__##name \ - __used __section(__soc_builtin_dtb_table) = \ + __used __section("__soc_builtin_dtb_table") = \ { \ .vendor_id = vendor, \ .arch_id = arch, \ diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h new file mode 100644 index 000000000000..470a65c4ccdc --- /dev/null +++ b/arch/riscv/include/asm/stacktrace.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_RISCV_STACKTRACE_H +#define _ASM_RISCV_STACKTRACE_H + +#include <linux/sched.h> +#include <asm/ptrace.h> + +struct stackframe { + unsigned long fp; + unsigned long ra; +}; + +extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, + bool (*fn)(void *, unsigned long), void *arg); + +#endif /* _ASM_RISCV_STACKTRACE_H */ diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 924af13f8555..5477e7ecb6e1 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -12,16 +12,16 @@ #define __HAVE_ARCH_MEMSET extern asmlinkage void *memset(void *, int, size_t); extern asmlinkage void *__memset(void *, int, size_t); - #define __HAVE_ARCH_MEMCPY extern asmlinkage void *memcpy(void *, const void *, size_t); extern asmlinkage void *__memcpy(void *, const void *, size_t); - +#define __HAVE_ARCH_MEMMOVE +extern asmlinkage void *memmove(void *, const void *, size_t); +extern asmlinkage void *__memmove(void *, const void *, size_t); /* For those files which don't want to check by kasan. */ #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) - #define memcpy(dst, src, len) __memcpy(dst, src, len) #define memset(s, c, n) __memset(s, c, n) - +#define memmove(dst, src, len) __memmove(dst, src, len) #endif #endif /* _ASM_RISCV_STRING_H */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 464a2bbc97ea..a390711129de 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -24,10 +24,6 @@ #include <asm/processor.h> #include <asm/csr.h> -typedef struct { - unsigned long seg; -} mm_segment_t; - /* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line @@ -39,7 +35,6 @@ typedef struct { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0=>preemptible, <0=>BUG */ - mm_segment_t addr_limit; /* * These stack pointers are overwritten on every system call or * exception. SP is also saved to the stack it can be recovered when @@ -59,7 +54,6 @@ struct thread_info { { \ .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .addr_limit = KERNEL_DS, \ } #endif /* !__ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index f56c66b3f5fe..c47e6b35c551 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -13,24 +13,6 @@ /* * User space memory access functions */ - -extern unsigned long __must_check __asm_copy_to_user(void __user *to, - const void *from, unsigned long n); -extern unsigned long __must_check __asm_copy_from_user(void *to, - const void __user *from, unsigned long n); - -static inline unsigned long -raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return __asm_copy_from_user(to, from, n); -} - -static inline unsigned long -raw_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return __asm_copy_to_user(to, from, n); -} - #ifdef CONFIG_MMU #include <linux/errno.h> #include <linux/compiler.h> @@ -44,29 +26,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) #define __disable_user_access() \ __asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory") -/* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(~0UL) -#define USER_DS MAKE_MM_SEG(TASK_SIZE) - -#define get_fs() (current_thread_info()->addr_limit) - -static inline void set_fs(mm_segment_t fs) -{ - current_thread_info()->addr_limit = fs; -} - -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -#define user_addr_max() (get_fs().seg) - /** * access_ok: - Checks if a user space pointer is valid * @addr: User space pointer to start of block to check @@ -94,9 +53,7 @@ static inline void set_fs(mm_segment_t fs) */ static inline int __access_ok(unsigned long addr, unsigned long size) { - const mm_segment_t fs = get_fs(); - - return size <= fs.seg && addr <= fs.seg - size; + return size <= TASK_SIZE && addr <= TASK_SIZE - size; } /* @@ -125,7 +82,6 @@ static inline int __access_ok(unsigned long addr, unsigned long size) do { \ uintptr_t __tmp; \ __typeof__(x) __x; \ - __enable_user_access(); \ __asm__ __volatile__ ( \ "1:\n" \ " " insn " %1, %3\n" \ @@ -143,7 +99,6 @@ do { \ " .previous" \ : "+r" (err), "=&r" (__x), "=r" (__tmp) \ : "m" (*(ptr)), "i" (-EFAULT)); \ - __disable_user_access(); \ (x) = __x; \ } while (0) @@ -156,7 +111,6 @@ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u32 __lo, __hi; \ uintptr_t __tmp; \ - __enable_user_access(); \ __asm__ __volatile__ ( \ "1:\n" \ " lw %1, %4\n" \ @@ -180,12 +134,30 @@ do { \ "=r" (__tmp) \ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]), \ "i" (-EFAULT)); \ - __disable_user_access(); \ (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ (((u64)__hi << 32) | __lo))); \ } while (0) #endif /* CONFIG_64BIT */ +#define __get_user_nocheck(x, __gu_ptr, __gu_err) \ +do { \ + switch (sizeof(*__gu_ptr)) { \ + case 1: \ + __get_user_asm("lb", (x), __gu_ptr, __gu_err); \ + break; \ + case 2: \ + __get_user_asm("lh", (x), __gu_ptr, __gu_err); \ + break; \ + case 4: \ + __get_user_asm("lw", (x), __gu_ptr, __gu_err); \ + break; \ + case 8: \ + __get_user_8((x), __gu_ptr, __gu_err); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ +} while (0) /** * __get_user: - Get a simple variable from user space, with less checking. @@ -209,25 +181,15 @@ do { \ */ #define __get_user(x, ptr) \ ({ \ - register long __gu_err = 0; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + long __gu_err = 0; \ + \ __chk_user_ptr(__gu_ptr); \ - switch (sizeof(*__gu_ptr)) { \ - case 1: \ - __get_user_asm("lb", (x), __gu_ptr, __gu_err); \ - break; \ - case 2: \ - __get_user_asm("lh", (x), __gu_ptr, __gu_err); \ - break; \ - case 4: \ - __get_user_asm("lw", (x), __gu_ptr, __gu_err); \ - break; \ - case 8: \ - __get_user_8((x), __gu_ptr, __gu_err); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ + \ + __enable_user_access(); \ + __get_user_nocheck(x, __gu_ptr, __gu_err); \ + __disable_user_access(); \ + \ __gu_err; \ }) @@ -261,7 +223,6 @@ do { \ do { \ uintptr_t __tmp; \ __typeof__(*(ptr)) __x = x; \ - __enable_user_access(); \ __asm__ __volatile__ ( \ "1:\n" \ " " insn " %z3, %2\n" \ @@ -278,7 +239,6 @@ do { \ " .previous" \ : "+r" (err), "=r" (__tmp), "=m" (*(ptr)) \ : "rJ" (__x), "i" (-EFAULT)); \ - __disable_user_access(); \ } while (0) #ifdef CONFIG_64BIT @@ -290,7 +250,6 @@ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u64 __x = (__typeof__((x)-(x)))(x); \ uintptr_t __tmp; \ - __enable_user_access(); \ __asm__ __volatile__ ( \ "1:\n" \ " sw %z4, %2\n" \ @@ -312,10 +271,28 @@ do { \ "=m" (__ptr[__LSW]), \ "=m" (__ptr[__MSW]) \ : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \ - __disable_user_access(); \ } while (0) #endif /* CONFIG_64BIT */ +#define __put_user_nocheck(x, __gu_ptr, __pu_err) \ +do { \ + switch (sizeof(*__gu_ptr)) { \ + case 1: \ + __put_user_asm("sb", (x), __gu_ptr, __pu_err); \ + break; \ + case 2: \ + __put_user_asm("sh", (x), __gu_ptr, __pu_err); \ + break; \ + case 4: \ + __put_user_asm("sw", (x), __gu_ptr, __pu_err); \ + break; \ + case 8: \ + __put_user_8((x), __gu_ptr, __pu_err); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ +} while (0) /** * __put_user: - Write a simple value into user space, with less checking. @@ -338,25 +315,15 @@ do { \ */ #define __put_user(x, ptr) \ ({ \ - register long __pu_err = 0; \ __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + long __pu_err = 0; \ + \ __chk_user_ptr(__gu_ptr); \ - switch (sizeof(*__gu_ptr)) { \ - case 1: \ - __put_user_asm("sb", (x), __gu_ptr, __pu_err); \ - break; \ - case 2: \ - __put_user_asm("sh", (x), __gu_ptr, __pu_err); \ - break; \ - case 4: \ - __put_user_asm("sw", (x), __gu_ptr, __pu_err); \ - break; \ - case 8: \ - __put_user_8((x), __gu_ptr, __pu_err); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ + \ + __enable_user_access(); \ + __put_user_nocheck(x, __gu_ptr, __pu_err); \ + __disable_user_access(); \ + \ __pu_err; \ }) @@ -385,6 +352,24 @@ do { \ -EFAULT; \ }) + +unsigned long __must_check __asm_copy_to_user(void __user *to, + const void *from, unsigned long n); +unsigned long __must_check __asm_copy_from_user(void *to, + const void __user *from, unsigned long n); + +static inline unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return __asm_copy_from_user(to, from, n); +} + +static inline unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return __asm_copy_to_user(to, from, n); +} + extern long strncpy_from_user(char *dest, const char __user *src, long count); extern long __must_check strlen_user(const char __user *str); @@ -476,6 +461,26 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) __ret; \ }) +#define HAVE_GET_KERNEL_NOFAULT + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + long __kr_err; \ + \ + __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ + if (unlikely(__kr_err)) \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + long __kr_err; \ + \ + __put_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ + if (unlikely(__kr_err)) \ + goto err_label; \ +} while (0) + #else /* CONFIG_MMU */ #include <asm-generic/uaccess.h> #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index d86cb17bbabe..32c73ba1d531 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -10,4 +10,28 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +/* + * The set of entries below represent more extensive information + * about the caches, in the form of two entry per cache type, + * one entry containing the cache size in bytes, and the other + * containing the cache line size in bytes in the bottom 16 bits + * and the cache associativity in the next 16 bits. + * + * The associativity is such that if N is the 16-bit value, the + * cache is N way set associative. A value if 0xffff means fully + * associative, a value of 1 means directly mapped. + * + * For all these fields, a value of 0 means that the information + * is not known. + */ +#define AT_L1I_CACHESIZE 40 +#define AT_L1I_CACHEGEOMETRY 41 +#define AT_L1D_CACHESIZE 42 +#define AT_L1D_CACHEGEOMETRY 43 +#define AT_L2_CACHESIZE 44 +#define AT_L2_CACHEGEOMETRY 45 + +/* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 7 + #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index dc93710f0b2f..f6caf4d9ca15 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -55,4 +55,4 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -clean: +obj-$(CONFIG_EFI) += efi.o diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index db203442c08f..b79ffa3561fd 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -11,6 +11,8 @@ #include <asm/thread_info.h> #include <asm/ptrace.h> +void asm_offsets(void); + void asm_offsets(void) { OFFSET(TASK_THREAD_RA, task_struct, thread.ra); diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index bd0f122965c3..de59dd457b41 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -3,7 +3,6 @@ * Copyright (C) 2017 SiFive */ -#include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/of.h> #include <linux/of_device.h> @@ -25,12 +24,84 @@ cache_get_priv_group(struct cacheinfo *this_leaf) return NULL; } -static void ci_leaf_init(struct cacheinfo *this_leaf, - struct device_node *node, - enum cache_type type, unsigned int level) +static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id()); + struct cacheinfo *this_leaf; + int index; + + for (index = 0; index < this_cpu_ci->num_leaves; index++) { + this_leaf = this_cpu_ci->info_list + index; + if (this_leaf->level == level && this_leaf->type == type) + return this_leaf; + } + + return NULL; +} + +uintptr_t get_cache_size(u32 level, enum cache_type type) +{ + struct cacheinfo *this_leaf = get_cacheinfo(level, type); + + return this_leaf ? this_leaf->size : 0; +} + +uintptr_t get_cache_geometry(u32 level, enum cache_type type) +{ + struct cacheinfo *this_leaf = get_cacheinfo(level, type); + + return this_leaf ? (this_leaf->ways_of_associativity << 16 | + this_leaf->coherency_line_size) : + 0; +} + +static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, + unsigned int level, unsigned int size, + unsigned int sets, unsigned int line_size) { this_leaf->level = level; this_leaf->type = type; + this_leaf->size = size; + this_leaf->number_of_sets = sets; + this_leaf->coherency_line_size = line_size; + + /* + * If the cache is fully associative, there is no need to + * check the other properties. + */ + if (sets == 1) + return; + + /* + * Set the ways number for n-ways associative, make sure + * all properties are big than zero. + */ + if (sets > 0 && size > 0 && line_size > 0) + this_leaf->ways_of_associativity = (size / sets) / line_size; +} + +static void fill_cacheinfo(struct cacheinfo **this_leaf, + struct device_node *node, unsigned int level) +{ + unsigned int size, sets, line_size; + + if (!of_property_read_u32(node, "cache-size", &size) && + !of_property_read_u32(node, "cache-block-size", &line_size) && + !of_property_read_u32(node, "cache-sets", &sets)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size); + } + + if (!of_property_read_u32(node, "i-cache-size", &size) && + !of_property_read_u32(node, "i-cache-sets", &sets) && + !of_property_read_u32(node, "i-cache-block-size", &line_size)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size); + } + + if (!of_property_read_u32(node, "d-cache-size", &size) && + !of_property_read_u32(node, "d-cache-sets", &sets) && + !of_property_read_u32(node, "d-cache-block-size", &line_size)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size); + } } static int __init_cache_level(unsigned int cpu) @@ -83,29 +154,24 @@ static int __populate_cache_leaves(unsigned int cpu) struct device_node *prev = NULL; int levels = 1, level = 1; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + /* Level 1 caches in cpu node */ + fill_cacheinfo(&this_leaf, np, level); + /* Next level caches in cache nodes */ prev = np; while ((np = of_find_next_cache_node(np))) { of_node_put(prev); prev = np; + if (!of_device_is_compatible(np, "cache")) break; if (of_property_read_u32(np, "cache-level", &level)) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) - ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); + + fill_cacheinfo(&this_leaf, np, level); + levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 0ec22354018c..1985884fe829 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -15,8 +15,8 @@ const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -void *__cpu_up_stack_pointer[NR_CPUS] __section(.data); -void *__cpu_up_task_pointer[NR_CPUS] __section(.data); +void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); +void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); extern const struct cpu_operations cpu_ops_sbi; extern const struct cpu_operations cpu_ops_spinwait; diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S new file mode 100644 index 000000000000..8e733aa48ba6 --- /dev/null +++ b/arch/riscv/kernel/efi-header.S @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * Adapted from arch/arm64/kernel/efi-header.S + */ + +#include <linux/pe.h> +#include <linux/sizes.h> + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: +#ifdef CONFIG_64BIT + .short IMAGE_FILE_MACHINE_RISCV64 // Machine +#else + .short IMAGE_FILE_MACHINE_RISCV32 // Machine +#endif + .short section_count // NumberOfSections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 0 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics + +optional_header: +#ifdef CONFIG_64BIT + .short PE_OPT_MAGIC_PE32PLUS // PE32+ format +#else + .short PE_OPT_MAGIC_PE32 // PE32 format +#endif + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long __pecoff_text_end - efi_header_end // SizeOfCode + .long __pecoff_data_virt_size // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long __efistub_efi_pe_entry - _start // AddressOfEntryPoint + .long efi_header_end - _start // BaseOfCode +#ifdef CONFIG_32BIT + .long __pecoff_text_end - _start // BaseOfData +#endif + +extra_header_fields: + .quad 0 // ImageBase + .long PECOFF_SECTION_ALIGNMENT // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion + .short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _end - _start // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long efi_header_end - _start // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long (section_table - .) / 8 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + .ascii ".text\0\0\0" + .long __pecoff_text_end - efi_header_end // VirtualSize + .long efi_header_end - _start // VirtualAddress + .long __pecoff_text_end - efi_header_end // SizeOfRawData + .long efi_header_end - _start // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE // Characteristics + + .ascii ".data\0\0\0" + .long __pecoff_data_virt_size // VirtualSize + .long __pecoff_text_end - _start // VirtualAddress + .long __pecoff_data_raw_size // SizeOfRawData + .long __pecoff_text_end - _start // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE // Characteristics + + .set section_count, (. - section_table) / 40 + + .balign 0x1000 +efi_header_end: + .endm diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c new file mode 100644 index 000000000000..024159298231 --- /dev/null +++ b/arch/riscv/kernel/efi.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * Adapted from arch/arm64/kernel/efi.c + */ + +#include <linux/efi.h> +#include <linux/init.h> + +#include <asm/efi.h> +#include <asm/pgtable.h> +#include <asm/pgtable-bits.h> + +/* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. Also take the new (optional) RO/XP bits into account. + */ +static __init pgprot_t efimem_to_pgprot_map(efi_memory_desc_t *md) +{ + u64 attr = md->attribute; + u32 type = md->type; + + if (type == EFI_MEMORY_MAPPED_IO) + return PAGE_KERNEL; + + /* R-- */ + if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == + (EFI_MEMORY_XP | EFI_MEMORY_RO)) + return PAGE_KERNEL_READ; + + /* R-X */ + if (attr & EFI_MEMORY_RO) + return PAGE_KERNEL_READ_EXEC; + + /* RW- */ + if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) == + EFI_MEMORY_XP) || + type != EFI_RUNTIME_SERVICES_CODE) + return PAGE_KERNEL; + + /* RWX */ + return PAGE_KERNEL_EXEC; +} + +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) +{ + pgprot_t prot = __pgprot(pgprot_val(efimem_to_pgprot_map(md)) & + ~(_PAGE_GLOBAL)); + int i; + + /* RISC-V maps one page at a time */ + for (i = 0; i < md->num_pages; i++) + create_pgd_mapping(mm->pgd, md->virt_addr + i * PAGE_SIZE, + md->phys_addr + i * PAGE_SIZE, + PAGE_SIZE, prot); + return 0; +} + +static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) +{ + efi_memory_desc_t *md = data; + pte_t pte = READ_ONCE(*ptep); + unsigned long val; + + if (md->attribute & EFI_MEMORY_RO) { + val = pte_val(pte) & ~_PAGE_WRITE; + val = pte_val(pte) | _PAGE_READ; + pte = __pte(val); + } + if (md->attribute & EFI_MEMORY_XP) { + val = pte_val(pte) & ~_PAGE_EXEC; + pte = __pte(val); + } + set_pte(ptep, pte); + + return 0; +} + +int __init efi_set_mapping_permissions(struct mm_struct *mm, + efi_memory_desc_t *md) +{ + BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + + /* + * Calling apply_to_page_range() is only safe on regions that are + * guaranteed to be mapped down to pages. Since we are only called + * for regions that have been mapped using efi_create_mapping() above + * (and this is checked by the generic Memory Attributes table parsing + * routines), there is no need to check that again here. + */ + return apply_to_page_range(mm, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + set_permissions, md); +} diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 0a4e81b8dc79..45dbdae930bf 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -3,7 +3,6 @@ * Copyright (C) 2012 Regents of the University of California */ -#include <asm/thread_info.h> #include <asm/asm-offsets.h> #include <asm/asm.h> #include <linux/init.h> @@ -13,6 +12,7 @@ #include <asm/csr.h> #include <asm/hwcap.h> #include <asm/image.h> +#include "efi-header.S" __HEAD ENTRY(_start) @@ -22,10 +22,18 @@ ENTRY(_start) * Do not modify it without modifying the structure and all bootloaders * that expects this header format!! */ +#ifdef CONFIG_EFI + /* + * This instruction decodes to "MZ" ASCII required by UEFI. + */ + c.li s4,-13 + j _start_kernel +#else /* jump to start kernel */ j _start_kernel /* reserved */ .word 0 +#endif .balign 8 #if __riscv_xlen == 64 /* Image load offset(2MB) from start of RAM */ @@ -43,7 +51,14 @@ ENTRY(_start) .ascii RISCV_IMAGE_MAGIC .balign 4 .ascii RISCV_IMAGE_MAGIC2 +#ifdef CONFIG_EFI + .word pe_head_start - _start +pe_head_start: + + __EFI_PE_HEADER +#else .word 0 +#endif .align 2 #ifdef CONFIG_MMU @@ -162,7 +177,6 @@ setup_trap_vector: END(_start) - __INIT ENTRY(_start_kernel) /* Mask all interrupts */ csrw CSR_IE, zero @@ -259,7 +273,6 @@ clear_bss_done: #endif /* Start the kernel */ call soc_early_init - call parse_dtb tail start_kernel .Lsecondary_start: diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index 105fb0496b24..b48dda3d04f6 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -16,6 +16,4 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa); extern void *__cpu_up_stack_pointer[]; extern void *__cpu_up_task_pointer[]; -void __init parse_dtb(void); - #endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h new file mode 100644 index 000000000000..8c212efb37a6 --- /dev/null +++ b/arch/riscv/kernel/image-vars.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * Linker script variables to be set after section resolution, as + * ld.lld does not like variables assigned before SECTIONS is processed. + * Based on arch/arm64/kerne/image-vars.h + */ +#ifndef __RISCV_KERNEL_IMAGE_VARS_H +#define __RISCV_KERNEL_IMAGE_VARS_H + +#ifndef LINKER_SCRIPT +#error This file should only be included in vmlinux.lds.S +#endif + +#ifdef CONFIG_EFI + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = memcmp; +__efistub_memchr = memchr; +__efistub_memcpy = memcpy; +__efistub_memmove = memmove; +__efistub_memset = memset; +__efistub_strlen = strlen; +__efistub_strnlen = strnlen; +__efistub_strcmp = strcmp; +__efistub_strncmp = strncmp; +__efistub_strrchr = strrchr; + +#ifdef CONFIG_KASAN +__efistub___memcpy = memcpy; +__efistub___memmove = memmove; +__efistub___memset = memset; +#endif + +__efistub__start = _start; +__efistub__start_kernel = _start_kernel; +__efistub__end = _end; +__efistub__edata = _edata; +__efistub_screen_info = screen_info; + +#endif + +#endif /* __RISCV_KERNEL_IMAGE_VARS_H */ diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index cf190197a22f..0bb1854dce83 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -4,11 +4,7 @@ #include <linux/perf_event.h> #include <linux/uaccess.h> -/* Kernel callchain */ -struct stackframe { - unsigned long fp; - unsigned long ra; -}; +#include <asm/stacktrace.h> /* * Get the return address for a single stackframe and return a pointer to the @@ -74,13 +70,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, fp = user_backtrace(entry, fp, 0); } -bool fill_callchain(unsigned long pc, void *entry) +static bool fill_callchain(void *entry, unsigned long pc) { return perf_callchain_store(entry, pc); } -void notrace walk_stackframe(struct task_struct *task, - struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg); void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 2b97c493427c..19225ec65db6 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -84,7 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long pc, } regs->epc = pc; regs->sp = sp; - set_fs(USER_DS); } void flush_thread(void) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 450492e1cb4e..5ab1c7e1a6ed 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -11,5 +11,7 @@ */ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 2c6dd329312b..a834deab31d3 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -4,6 +4,8 @@ * Chen Liqin <liqin.chen@sunplusct.com> * Lennox Wu <lennox.wu@sunplusct.com> * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2020 FORTH-ICS/CARV + * Nick Kossifidis <mick@ics.forth.gr> */ #include <linux/init.h> @@ -17,19 +19,23 @@ #include <linux/sched/task.h> #include <linux/swiotlb.h> #include <linux/smp.h> +#include <linux/efi.h> #include <asm/cpu_ops.h> +#include <asm/early_ioremap.h> #include <asm/setup.h> +#include <asm/set_memory.h> #include <asm/sections.h> #include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/thread_info.h> #include <asm/kasan.h> +#include <asm/efi.h> #include "head.h" -#ifdef CONFIG_DUMMY_CONSOLE -struct screen_info screen_info = { +#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI) +struct screen_info screen_info __section(".data") = { .orig_video_lines = 30, .orig_video_cols = 80, .orig_video_mode = 0, @@ -44,12 +50,170 @@ struct screen_info screen_info = { * This is used before the kernel initializes the BSS so it can't be in the * BSS. */ -atomic_t hart_lottery __section(.sdata); +atomic_t hart_lottery __section(".sdata"); unsigned long boot_cpu_hartid; static DEFINE_PER_CPU(struct cpu, cpu_devices); -void __init parse_dtb(void) +/* + * Place kernel memory regions on the resource tree so that + * kexec-tools can retrieve them from /proc/iomem. While there + * also add "System RAM" regions for compatibility with other + * archs, and the rest of the known regions for completeness. + */ +static struct resource code_res = { .name = "Kernel code", }; +static struct resource data_res = { .name = "Kernel data", }; +static struct resource rodata_res = { .name = "Kernel rodata", }; +static struct resource bss_res = { .name = "Kernel bss", }; + +static int __init add_resource(struct resource *parent, + struct resource *res) { + int ret = 0; + + ret = insert_resource(parent, res); + if (ret < 0) { + pr_err("Failed to add a %s resource at %llx\n", + res->name, (unsigned long long) res->start); + return ret; + } + + return 1; +} + +static int __init add_kernel_resources(struct resource *res) +{ + int ret = 0; + + /* + * The memory region of the kernel image is continuous and + * was reserved on setup_bootmem, find it here and register + * it as a resource, then register the various segments of + * the image as child nodes + */ + if (!(res->start <= code_res.start && res->end >= data_res.end)) + return 0; + + res->name = "Kernel image"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + /* + * We removed a part of this region on setup_bootmem so + * we need to expand the resource for the bss to fit in. + */ + res->end = bss_res.end; + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + return ret; + + ret = add_resource(res, &code_res); + if (ret < 0) + return ret; + + ret = add_resource(res, &rodata_res); + if (ret < 0) + return ret; + + ret = add_resource(res, &data_res); + if (ret < 0) + return ret; + + ret = add_resource(res, &bss_res); + + return ret; +} + +static void __init init_resources(void) +{ + struct memblock_region *region = NULL; + struct resource *res = NULL; + int ret = 0; + + code_res.start = __pa_symbol(_text); + code_res.end = __pa_symbol(_etext) - 1; + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + rodata_res.start = __pa_symbol(__start_rodata); + rodata_res.end = __pa_symbol(__end_rodata) - 1; + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + data_res.start = __pa_symbol(_data); + data_res.end = __pa_symbol(_edata) - 1; + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + bss_res.start = __pa_symbol(__bss_start); + bss_res.end = __pa_symbol(__bss_stop) - 1; + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + /* + * Start by adding the reserved regions, if they overlap + * with /memory regions, insert_resource later on will take + * care of it. + */ + for_each_reserved_mem_region(region) { + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); + if (!res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct resource)); + + res->name = "Reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; + + ret = add_kernel_resources(res); + if (ret < 0) + goto error; + else if (ret) + continue; + + /* + * Ignore any other reserved regions within + * system memory. + */ + if (memblock_is_memory(res->start)) + continue; + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + goto error; + } + + /* Add /memory regions to the resource tree */ + for_each_mem_region(region) { + res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); + if (!res) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct resource)); + + if (unlikely(memblock_is_nomap(region))) { + res->name = "Reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + } else { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + } + + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + goto error; + } + + return; + + error: + memblock_free((phys_addr_t) res, sizeof(struct resource)); + /* Better an empty resource tree than an inconsistent one */ + release_child_resources(&iomem_resource); +} + + +static void __init parse_dtb(void) +{ + /* Early scan of device tree from init memory */ if (early_init_dt_scan(dtb_early_va)) return; @@ -62,6 +226,7 @@ void __init parse_dtb(void) void __init setup_arch(char **cmdline_p) { + parse_dtb(); init_mm.start_code = (unsigned long) _stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; @@ -69,16 +234,27 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_ioremap_setup(); parse_early_param(); + efi_init(); setup_bootmem(); paging_init(); + init_resources(); #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else - unflatten_device_tree(); + if (early_init_dt_verify(__va(dtb_early_pa))) + unflatten_device_tree(); + else + pr_err("No DTB found in kernel mappings\n"); #endif + if (IS_ENABLED(CONFIG_RISCV_SBI)) + sbi_init(); + + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + protect_kernel_text_data(); #ifdef CONFIG_SWIOTLB swiotlb_init(1); #endif @@ -87,10 +263,6 @@ void __init setup_arch(char **cmdline_p) kasan_init(); #endif -#if IS_ENABLED(CONFIG_RISCV_SBI) - sbi_init(); -#endif - #ifdef CONFIG_SMP setup_smp(); #endif @@ -112,3 +284,12 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); + +void free_initmem(void) +{ + unsigned long init_begin = (unsigned long)__init_begin; + unsigned long init_end = (unsigned long)__init_end; + + set_memory_rw_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT); + free_initmem_default(POISON_FREE_INITMEM); +} diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index e996e08f1061..bc6841867b51 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -313,8 +313,6 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); - if (thread_info_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); + if (thread_info_flags & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); - } } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 595342910c3f..48b870a685b3 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -12,17 +12,14 @@ #include <linux/stacktrace.h> #include <linux/ftrace.h> +#include <asm/stacktrace.h> + register unsigned long sp_in_global __asm__("sp"); #ifdef CONFIG_FRAME_POINTER -struct stackframe { - unsigned long fp; - unsigned long ra; -}; - void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, - bool (*fn)(unsigned long, void *), void *arg) + bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; @@ -46,7 +43,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || fn(pc, arg))) + if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) break; /* Validate frame pointer */ @@ -66,7 +63,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, #else /* !CONFIG_FRAME_POINTER */ void notrace walk_stackframe(struct task_struct *task, - struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg) + struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long sp, pc; unsigned long *ksp; @@ -88,7 +85,7 @@ void notrace walk_stackframe(struct task_struct *task, ksp = (unsigned long *)sp; while (!kstack_end(ksp)) { - if (__kernel_text_address(pc) && unlikely(fn(pc, arg))) + if (__kernel_text_address(pc) && unlikely(!fn(arg, pc))) break; pc = (*ksp++) - 0x4; } @@ -96,13 +93,12 @@ void notrace walk_stackframe(struct task_struct *task, #endif /* CONFIG_FRAME_POINTER */ - -static bool print_trace_address(unsigned long pc, void *arg) +static bool print_trace_address(void *arg, unsigned long pc) { const char *loglvl = arg; print_ip_sym(loglvl, pc); - return false; + return true; } void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) @@ -111,14 +107,14 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) walk_stackframe(task, NULL, print_trace_address, (void *)loglvl); } -static bool save_wchan(unsigned long pc, void *arg) +static bool save_wchan(void *arg, unsigned long pc) { if (!in_sched_functions(pc)) { unsigned long *p = arg; *p = pc; - return true; + return false; } - return false; + return true; } unsigned long get_wchan(struct task_struct *task) @@ -130,42 +126,12 @@ unsigned long get_wchan(struct task_struct *task) return pc; } - #ifdef CONFIG_STACKTRACE -static bool __save_trace(unsigned long pc, void *arg, bool nosched) -{ - struct stack_trace *trace = arg; - - if (unlikely(nosched && in_sched_functions(pc))) - return false; - if (unlikely(trace->skip > 0)) { - trace->skip--; - return false; - } - - trace->entries[trace->nr_entries++] = pc; - return (trace->nr_entries >= trace->max_entries); -} - -static bool save_trace(unsigned long pc, void *arg) -{ - return __save_trace(pc, arg, false); -} - -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) -{ - walk_stackframe(tsk, NULL, save_trace, trace); -} -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); - -void save_stack_trace(struct stack_trace *trace) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { - save_stack_trace_tsk(NULL, trace); + walk_stackframe(task, regs, consume_entry, cookie); } -EXPORT_SYMBOL_GPL(save_stack_trace); #endif /* CONFIG_STACKTRACE */ diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 478e7338ddc1..7d6a94d45ec9 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -49,7 +49,7 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE # refer to these symbols in the kernel code rather than hand-coded addresses. SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id -Wl,--hash-style=both + -Wl,--build-id=sha1 -Wl,--hash-style=both $(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE $(call if_changed,vdsold) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 34d00d9e6eac..de03cb22d0e9 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -10,6 +10,7 @@ #include <asm/cache.h> #include <asm/thread_info.h> #include <asm/set_memory.h> +#include "image-vars.h" #include <linux/sizes.h> OUTPUT_ARCH(riscv) @@ -17,6 +18,9 @@ ENTRY(_start) jiffies = jiffies_64; +PECOFF_SECTION_ALIGNMENT = 0x1000; +PECOFF_FILE_ALIGNMENT = 0x200; + SECTIONS { /* Beginning of code and text segment */ @@ -25,8 +29,30 @@ SECTIONS HEAD_TEXT_SECTION . = ALIGN(PAGE_SIZE); + .text : { + _text = .; + _stext = .; + TEXT_TEXT + SCHED_TEXT + CPUIDLE_TEXT + LOCK_TEXT + KPROBES_TEXT + ENTRY_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + *(.fixup) + _etext = .; + } + + . = ALIGN(SECTION_ALIGN); __init_begin = .; - INIT_TEXT_SECTION(PAGE_SIZE) + __init_text_begin = .; + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) ALIGN(SECTION_ALIGN) { \ + _sinittext = .; \ + INIT_TEXT \ + _einittext = .; \ + } + . = ALIGN(8); __soc_early_init_table : { __soc_early_init_table_start = .; @@ -43,30 +69,28 @@ SECTIONS { EXIT_TEXT } + + __init_text_end = .; + . = ALIGN(SECTION_ALIGN); +#ifdef CONFIG_EFI + . = ALIGN(PECOFF_SECTION_ALIGNMENT); + __pecoff_text_end = .; +#endif + /* Start of init data section */ + __init_data_begin = .; + INIT_DATA_SECTION(16) .exit.data : { EXIT_DATA } PERCPU_SECTION(L1_CACHE_BYTES) - __init_end = .; - . = ALIGN(SECTION_ALIGN); - .text : { - _text = .; - _stext = .; - TEXT_TEXT - SCHED_TEXT - CPUIDLE_TEXT - LOCK_TEXT - KPROBES_TEXT - ENTRY_TEXT - IRQENTRY_TEXT - SOFTIRQENTRY_TEXT - *(.fixup) - _etext = .; + .rel.dyn : { + *(.rel.dyn*) } - INIT_DATA_SECTION(16) + __init_data_end = .; + __init_end = .; /* Start of data section */ _sdata = .; @@ -84,20 +108,27 @@ SECTIONS .sdata : { __global_pointer$ = . + 0x800; *(.sdata*) - /* End of data section */ - _edata = .; } - BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) +#ifdef CONFIG_EFI + .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } + __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end); +#endif - .rel.dyn : { - *(.rel.dyn*) - } + /* End of data section */ + _edata = .; + + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) +#ifdef CONFIG_EFI + . = ALIGN(PECOFF_SECTION_ALIGNMENT); + __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end); +#endif _end = .; STABS_DEBUG DWARF_DEBUG + ELF_DETAILS DISCARDS } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 0d0db80800c4..ac6171e9c19e 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -2,5 +2,6 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o -lib-y += uaccess.o +lib-y += memmove.o +lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S new file mode 100644 index 000000000000..07d1d2152ba5 --- /dev/null +++ b/arch/riscv/lib/memmove.S @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/asm.h> + +ENTRY(__memmove) +WEAK(memmove) + move t0, a0 + move t1, a1 + + beq a0, a1, exit_memcpy + beqz a2, exit_memcpy + srli t2, a2, 0x2 + + slt t3, a0, a1 + beqz t3, do_reverse + + andi a2, a2, 0x3 + li t4, 1 + beqz t2, byte_copy + +word_copy: + lw t3, 0(a1) + addi t2, t2, -1 + addi a1, a1, 4 + sw t3, 0(a0) + addi a0, a0, 4 + bnez t2, word_copy + beqz a2, exit_memcpy + j byte_copy + +do_reverse: + add a0, a0, a2 + add a1, a1, a2 + andi a2, a2, 0x3 + li t4, -1 + beqz t2, reverse_byte_copy + +reverse_word_copy: + addi a1, a1, -4 + addi t2, t2, -1 + lw t3, 0(a1) + addi a0, a0, -4 + sw t3, 0(a0) + bnez t2, reverse_word_copy + beqz a2, exit_memcpy + +reverse_byte_copy: + addi a0, a0, -1 + addi a1, a1, -1 + +byte_copy: + lb t3, 0(a1) + addi a2, a2, -1 + sb t3, 0(a0) + add a1, a1, t4 + add a0, a0, t4 + bnez a2, byte_copy + +exit_memcpy: + move a0, t0 + move a1, t1 + ret +END(__memmove) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 716d64e36f83..1359e21c0c62 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -19,6 +19,167 @@ #include "../kernel/head.h" +static inline void no_context(struct pt_regs *regs, unsigned long addr) +{ + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + bust_spinlocks(1); + pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : + "paging request", addr); + die(regs, "Oops"); + do_exit(SIGKILL); +} + +static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) +{ + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + pagefault_out_of_memory(); + return; + } else if (fault & VM_FAULT_SIGBUS) { + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + do_trap(regs, SIGBUS, BUS_ADRERR, addr); + return; + } + BUG(); +} + +static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) +{ + /* + * Something tried to access memory that isn't in our memory map. + * Fix it, but check if it's kernel or user first. + */ + mmap_read_unlock(mm); + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + do_trap(regs, SIGSEGV, code, addr); + return; + } + + no_context(regs, addr); +} + +static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) +{ + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + p4d_t *p4d, *p4d_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + int index; + + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) + return do_trap(regs, SIGSEGV, code, addr); + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk->active_mm->pgd" here. + * We might be inside an interrupt in the middle + * of a task switch. + */ + index = pgd_index(addr); + pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) { + no_context(regs, addr); + return; + } + set_pgd(pgd, *pgd_k); + + p4d = p4d_offset(pgd, addr); + p4d_k = p4d_offset(pgd_k, addr); + if (!p4d_present(*p4d_k)) { + no_context(regs, addr); + return; + } + + pud = pud_offset(p4d, addr); + pud_k = pud_offset(p4d_k, addr); + if (!pud_present(*pud_k)) { + no_context(regs, addr); + return; + } + + /* + * Since the vmalloc area is global, it is unnecessary + * to copy individual PTEs + */ + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); + if (!pmd_present(*pmd_k)) { + no_context(regs, addr); + return; + } + set_pmd(pmd, *pmd_k); + + /* + * Make sure the actual PTE exists as well to + * catch kernel vmalloc-area accesses to non-mapped + * addresses. If we don't do this, this will just + * silently loop forever. + */ + pte_k = pte_offset_kernel(pmd_k, addr); + if (!pte_present(*pte_k)) { + no_context(regs, addr); + return; + } + + /* + * The kernel assumes that TLBs don't cache invalid + * entries, but in RISC-V, SFENCE.VMA specifies an + * ordering constraint, not a cache flush; it is + * necessary even after writing invalid entries. + */ + local_flush_tlb_page(addr); +} + +static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) +{ + switch (cause) { + case EXC_INST_PAGE_FAULT: + if (!(vma->vm_flags & VM_EXEC)) { + return true; + } + break; + case EXC_LOAD_PAGE_FAULT: + if (!(vma->vm_flags & VM_READ)) { + return true; + } + break; + case EXC_STORE_PAGE_FAULT: + if (!(vma->vm_flags & VM_WRITE)) { + return true; + } + break; + default: + panic("%s: unhandled cause %lu", __func__, cause); + } + return false; +} + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -48,8 +209,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) - goto vmalloc_fault; + if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) { + vmalloc_fault(regs, code, addr); + return; + } /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->status & SR_PIE)) @@ -59,25 +222,37 @@ asmlinkage void do_page_fault(struct pt_regs *regs) * If we're in an interrupt, have no user context, or are running * in an atomic region, then we must not take the fault. */ - if (unlikely(faulthandler_disabled() || !mm)) - goto no_context; + if (unlikely(faulthandler_disabled() || !mm)) { + no_context(regs, addr); + return; + } if (user_mode(regs)) flags |= FAULT_FLAG_USER; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + if (cause == EXC_STORE_PAGE_FAULT) + flags |= FAULT_FLAG_WRITE; + else if (cause == EXC_INST_PAGE_FAULT) + flags |= FAULT_FLAG_INSTRUCTION; retry: mmap_read_lock(mm); vma = find_vma(mm, addr); - if (unlikely(!vma)) - goto bad_area; + if (unlikely(!vma)) { + bad_area(regs, mm, code, addr); + return; + } if (likely(vma->vm_start <= addr)) goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) - goto bad_area; - if (unlikely(expand_stack(vma, addr))) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, mm, code, addr); + return; + } + if (unlikely(expand_stack(vma, addr))) { + bad_area(regs, mm, code, addr); + return; + } /* * Ok, we have a good vm_area for this memory access, so @@ -86,22 +261,9 @@ retry: good_area: code = SEGV_ACCERR; - switch (cause) { - case EXC_INST_PAGE_FAULT: - if (!(vma->vm_flags & VM_EXEC)) - goto bad_area; - break; - case EXC_LOAD_PAGE_FAULT: - if (!(vma->vm_flags & VM_READ)) - goto bad_area; - break; - case EXC_STORE_PAGE_FAULT: - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - flags |= FAULT_FLAG_WRITE; - break; - default: - panic("%s: unhandled cause %lu", __func__, cause); + if (unlikely(access_error(cause, vma))) { + bad_area(regs, mm, code, addr); + return; } /* @@ -119,144 +281,22 @@ good_area: if (fault_signal_pending(fault, regs)) return; - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_RETRY) { - flags |= FAULT_FLAG_TRIED; + if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) { + flags |= FAULT_FLAG_TRIED; - /* - * No need to mmap_read_unlock(mm) as we would - * have already released it in __lock_page_or_retry - * in mm/filemap.c. - */ - goto retry; - } + /* + * No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; } mmap_read_unlock(mm); - return; - /* - * Something tried to access memory that isn't in our memory map. - * Fix it, but check if it's kernel or user first. - */ -bad_area: - mmap_read_unlock(mm); - /* User mode accesses just cause a SIGSEGV */ - if (user_mode(regs)) { - do_trap(regs, SIGSEGV, code, addr); + if (unlikely(fault & VM_FAULT_ERROR)) { + mm_fault_error(regs, addr, fault); return; } - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - bust_spinlocks(1); - pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", - (addr < PAGE_SIZE) ? "NULL pointer dereference" : - "paging request", addr); - die(regs, "Oops"); - do_exit(SIGKILL); - - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ -out_of_memory: - mmap_read_unlock(mm); - if (!user_mode(regs)) - goto no_context; - pagefault_out_of_memory(); - return; - -do_sigbus: - mmap_read_unlock(mm); - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - goto no_context; - do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; - -vmalloc_fault: - { - pgd_t *pgd, *pgd_k; - pud_t *pud, *pud_k; - p4d_t *p4d, *p4d_k; - pmd_t *pmd, *pmd_k; - pte_t *pte_k; - int index; - - /* User mode accesses just cause a SIGSEGV */ - if (user_mode(regs)) - return do_trap(regs, SIGSEGV, code, addr); - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk->active_mm->pgd" here. - * We might be inside an interrupt in the middle - * of a task switch. - */ - index = pgd_index(addr); - pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - goto no_context; - set_pgd(pgd, *pgd_k); - - p4d = p4d_offset(pgd, addr); - p4d_k = p4d_offset(pgd_k, addr); - if (!p4d_present(*p4d_k)) - goto no_context; - - pud = pud_offset(p4d, addr); - pud_k = pud_offset(p4d_k, addr); - if (!pud_present(*pud_k)) - goto no_context; - - /* - * Since the vmalloc area is global, it is unnecessary - * to copy individual PTEs - */ - pmd = pmd_offset(pud, addr); - pmd_k = pmd_offset(pud_k, addr); - if (!pmd_present(*pmd_k)) - goto no_context; - set_pmd(pmd, *pmd_k); - - /* - * Make sure the actual PTE exists as well to - * catch kernel vmalloc-area accesses to non-mapped - * addresses. If we don't do this, this will just - * silently loop forever. - */ - pte_k = pte_offset_kernel(pmd_k, addr); - if (!pte_present(*pte_k)) - goto no_context; - - /* - * The kernel assumes that TLBs don't cache invalid - * entries, but in RISC-V, SFENCE.VMA specifies an - * ordering constraint, not a cache flush; it is - * necessary even after writing invalid entries. - */ - local_flush_tlb_page(addr); - - return; - } } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f750e012dbe5..87c305c566ac 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -13,6 +13,7 @@ #include <linux/of_fdt.h> #include <linux/libfdt.h> #include <linux/set_memory.h> +#include <linux/dma-map-ops.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -28,15 +29,27 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] EXPORT_SYMBOL(empty_zero_page); extern char _start[]; -void *dtb_early_va; +#define DTB_EARLY_BASE_VA PGDIR_SIZE +void *dtb_early_va __initdata; +uintptr_t dtb_early_pa __initdata; + +struct pt_alloc_ops { + pte_t *(*get_pte_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pte)(uintptr_t va); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t *(*get_pmd_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pmd)(uintptr_t va); +#endif +}; + +static phys_addr_t dma32_phys_limit __ro_after_init; static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, - (unsigned long) PFN_PHYS(max_low_pfn))); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; @@ -141,25 +154,23 @@ disable: } #endif /* CONFIG_BLK_DEV_INITRD */ -static phys_addr_t dtb_early_pa __initdata; - void __init setup_bootmem(void) { - struct memblock_region *reg; phys_addr_t mem_size = 0; phys_addr_t total_mem = 0; - phys_addr_t mem_start, end = 0; + phys_addr_t mem_start, start, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); + u64 i; /* Find the memory region containing the kernel */ - for_each_memblock(memory, reg) { - end = reg->base + reg->size; + for_each_mem_range(i, &start, &end) { + phys_addr_t size = end - start; if (!total_mem) - mem_start = reg->base; - if (reg->base <= vmlinux_start && vmlinux_end <= end) - BUG_ON(reg->size == 0); - total_mem = total_mem + reg->size; + mem_start = start; + if (start <= vmlinux_start && vmlinux_end <= end) + BUG_ON(size == 0); + total_mem = total_mem + size; } /* @@ -176,6 +187,7 @@ void __init setup_bootmem(void) max_pfn = PFN_DOWN(memblock_end_of_DRAM()); max_low_pfn = max_pfn; + dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn); #ifdef CONFIG_BLK_DEV_INITRD @@ -189,20 +201,14 @@ void __init setup_bootmem(void) memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); early_init_fdt_scan_reserved_mem(); + dma_contiguous_reserve(dma32_phys_limit); memblock_allow_resize(); memblock_dump_all(); - - for_each_memblock(memory, reg) { - unsigned long start_pfn = memblock_region_memory_base_pfn(reg); - unsigned long end_pfn = memblock_region_memory_end_pfn(reg); - - memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), - &memblock.memory, 0); - } } #ifdef CONFIG_MMU +static struct pt_alloc_ops pt_ops; + unsigned long va_pa_offset; EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; @@ -211,7 +217,6 @@ EXPORT_SYMBOL(pfn_base); pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; -static bool mmu_enabled; #define MAX_EARLY_MAPPING_SIZE SZ_128M @@ -233,27 +238,46 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) local_flush_tlb_page(addr); } -static pte_t *__init get_pte_virt(phys_addr_t pa) +static inline pte_t *__init get_pte_virt_early(phys_addr_t pa) { - if (mmu_enabled) { - clear_fixmap(FIX_PTE); - return (pte_t *)set_fixmap_offset(FIX_PTE, pa); - } else { - return (pte_t *)((uintptr_t)pa); - } + return (pte_t *)((uintptr_t)pa); +} + +static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_PTE); + return (pte_t *)set_fixmap_offset(FIX_PTE, pa); } -static phys_addr_t __init alloc_pte(uintptr_t va) +static inline pte_t *get_pte_virt_late(phys_addr_t pa) +{ + return (pte_t *) __va(pa); +} + +static inline phys_addr_t __init alloc_pte_early(uintptr_t va) { /* * We only create PMD or PGD early mappings so we * should never reach here with MMU disabled. */ - BUG_ON(!mmu_enabled); + BUG(); +} +static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) +{ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); } +static phys_addr_t alloc_pte_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr))) + BUG(); + return __pa(vaddr); +} + static void __init create_pte_mapping(pte_t *ptep, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -278,28 +302,46 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); -static pmd_t *__init get_pmd_virt(phys_addr_t pa) +static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { - if (mmu_enabled) { - clear_fixmap(FIX_PMD); - return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); - } else { - return (pmd_t *)((uintptr_t)pa); - } + /* Before MMU is enabled */ + return (pmd_t *)((uintptr_t)pa); } -static phys_addr_t __init alloc_pmd(uintptr_t va) +static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa) { - uintptr_t pmd_num; + clear_fixmap(FIX_PMD); + return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); +} + +static pmd_t *get_pmd_virt_late(phys_addr_t pa) +{ + return (pmd_t *) __va(pa); +} - if (mmu_enabled) - return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +static phys_addr_t __init alloc_pmd_early(uintptr_t va) +{ + uintptr_t pmd_num; pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; BUG_ON(pmd_num >= NUM_EARLY_PMDS); return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; } +static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_pmd_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + static void __init create_pmd_mapping(pmd_t *pmdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) @@ -315,34 +357,34 @@ static void __init create_pmd_mapping(pmd_t *pmdp, } if (pmd_none(pmdp[pmd_idx])) { - pte_phys = alloc_pte(va); + pte_phys = pt_ops.alloc_pte(va); pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); - ptep = get_pte_virt(pte_phys); + ptep = pt_ops.get_pte_virt(pte_phys); memset(ptep, 0, PAGE_SIZE); } else { pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); - ptep = get_pte_virt(pte_phys); + ptep = pt_ops.get_pte_virt(pte_phys); } create_pte_mapping(ptep, va, pa, sz, prot); } #define pgd_next_t pmd_t -#define alloc_pgd_next(__va) alloc_pmd(__va) -#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) +#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va) +#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next fixmap_pmd #else #define pgd_next_t pte_t -#define alloc_pgd_next(__va) alloc_pte(__va) -#define get_pgd_next_virt(__pa) get_pte_virt(__pa) +#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) +#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next fixmap_pte #endif -static void __init create_pgd_mapping(pgd_t *pgdp, +void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot) { @@ -398,10 +440,13 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t va, end_va; + uintptr_t va, pa, end_va; uintptr_t load_pa = (uintptr_t)(&_start); uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t fix_bmap_spmd, fix_bmap_epmd; +#endif va_pa_offset = PAGE_OFFSET - load_pa; pfn_base = PFN_DOWN(load_pa); @@ -417,6 +462,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((load_pa % map_size) != 0); BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); + pt_ops.alloc_pte = alloc_pte_early; + pt_ops.get_pte_virt = get_pte_virt_early; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_early; + pt_ops.get_pmd_virt = get_pmd_virt_early; +#endif /* Setup early PGD for fixmap */ create_pgd_mapping(early_pg_dir, FIXADDR_START, (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); @@ -447,42 +498,71 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) load_pa + (va - PAGE_OFFSET), map_size, PAGE_KERNEL_EXEC); - /* Create fixed mapping for early FDT parsing */ - end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE; - for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE) - create_pte_mapping(fixmap_pte, va, - dtb_pa + (va - __fix_to_virt(FIX_FDT)), - PAGE_SIZE, PAGE_KERNEL); - - /* Save pointer to DTB for early FDT parsing */ - dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); - /* Save physical address for memblock reservation */ + /* Create two consecutive PGD mappings for FDT early scan */ + pa = dtb_pa & ~(PGDIR_SIZE - 1); + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + pa, PGDIR_SIZE, PAGE_KERNEL); + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, + pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); + dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); dtb_early_pa = dtb_pa; + + /* + * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap + * range can not span multiple pmds. + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + +#ifndef __PAGETABLE_PMD_FOLDED + /* + * Early ioremap fixmap is already created as it lies within first 2MB + * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END + * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn + * the user if not. + */ + fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))]; + fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))]; + if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) { + WARN_ON(1); + pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n", + pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd)); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +#endif } static void __init setup_vm_final(void) { uintptr_t va, map_size; phys_addr_t pa, start, end; - struct memblock_region *reg; - - /* Set mmu_enabled flag */ - mmu_enabled = true; + u64 i; + /** + * MMU is enabled at this point. But page table setup is not complete yet. + * fixmap page table alloc functions should be used at this point + */ + pt_ops.alloc_pte = alloc_pte_fixmap; + pt_ops.get_pte_virt = get_pte_virt_fixmap; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_fixmap; + pt_ops.get_pmd_virt = get_pmd_virt_fixmap; +#endif /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, __pa_symbol(fixmap_pgd_next), PGDIR_SIZE, PAGE_TABLE); /* Map all memory banks */ - for_each_memblock(memory, reg) { - start = reg->base; - end = start + reg->size; - + for_each_mem_range(i, &start, &end) { if (start >= end) break; - if (memblock_is_nomap(reg)) - continue; if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); @@ -502,6 +582,14 @@ static void __init setup_vm_final(void) /* Move to swapper page table */ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); + + /* generic page allocation functions must be used to setup page table */ + pt_ops.alloc_pte = alloc_pte_late; + pt_ops.get_pte_virt = get_pte_virt_late; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_late; + pt_ops.get_pmd_virt = get_pmd_virt_late; +#endif } #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) @@ -524,48 +612,33 @@ static inline void setup_vm_final(void) #endif /* CONFIG_MMU */ #ifdef CONFIG_STRICT_KERNEL_RWX -void mark_rodata_ro(void) +void protect_kernel_text_data(void) { - unsigned long text_start = (unsigned long)_text; - unsigned long text_end = (unsigned long)_etext; + unsigned long text_start = (unsigned long)_start; + unsigned long init_text_start = (unsigned long)__init_text_begin; + unsigned long init_data_start = (unsigned long)__init_data_begin; unsigned long rodata_start = (unsigned long)__start_rodata; unsigned long data_start = (unsigned long)_data; unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); - set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT); - set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT); + set_memory_ro(init_text_start, (init_data_start - init_text_start) >> PAGE_SHIFT); + set_memory_nx(init_data_start, (rodata_start - init_data_start) >> PAGE_SHIFT); + /* rodata section is marked readonly in mark_rodata_ro */ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT); - - debug_checkwx(); } -#endif -static void __init resource_init(void) +void mark_rodata_ro(void) { - struct memblock_region *region; - - for_each_memblock(memory, region) { - struct resource *res; - - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + unsigned long rodata_start = (unsigned long)__start_rodata; + unsigned long data_start = (unsigned long)_data; - if (memblock_is_nomap(region)) { - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - } else { - res->name = "System RAM"; - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - } - res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); - res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); - request_resource(&iomem_resource, res); - } + debug_checkwx(); } +#endif void __init paging_init(void) { @@ -573,7 +646,6 @@ void __init paging_init(void) sparse_init(); setup_zero_page(); zone_sizes_init(); - resource_init(); } #ifdef CONFIG_SPARSEMEM_VMEMMAP diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 87b4ab3d3c77..12ddd1f6bf70 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -85,16 +85,16 @@ static void __init populate(void *start, void *end) void __init kasan_init(void) { - struct memblock_region *reg; - unsigned long i; + phys_addr_t _start, _end; + u64 i; kasan_populate_early_shadow((void *)KASAN_SHADOW_START, (void *)kasan_mem_to_shadow((void *) VMALLOC_END)); - for_each_memblock(memory, reg) { - void *start = (void *)__va(reg->base); - void *end = (void *)__va(reg->base + reg->size); + for_each_mem_range(i, &_start, &_end) { + void *start = (void *)_start; + void *end = (void *)_end; if (start >= end) break; diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 19fecb362d81..c64ec224fd0c 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, return ret; } +int set_memory_rw_nx(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE), + __pgprot(_PAGE_EXEC)); +} + int set_memory_ro(unsigned long addr, int numpages) { return __set_memory(addr, numpages, __pgprot(_PAGE_READ), diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 0831c2e61a8f..ace74dec7492 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -3,6 +3,7 @@ * Copyright (C) 2019 SiFive */ +#include <linux/efi.h> #include <linux/init.h> #include <linux/debugfs.h> #include <linux/seq_file.h> @@ -49,6 +50,14 @@ struct addr_marker { const char *name; }; +/* Private information for debugfs */ +struct ptd_mm_info { + struct mm_struct *mm; + const struct addr_marker *markers; + unsigned long base_addr; + unsigned long end; +}; + static struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN {KASAN_SHADOW_START, "Kasan shadow start"}, @@ -68,6 +77,28 @@ static struct addr_marker address_markers[] = { {-1, NULL}, }; +static struct ptd_mm_info kernel_ptd_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = KERN_VIRT_START, + .end = ULONG_MAX, +}; + +#ifdef CONFIG_EFI +static struct addr_marker efi_addr_markers[] = { + { 0, "UEFI runtime start" }, + { SZ_1G, "UEFI runtime end" }, + { -1, NULL } +}; + +static struct ptd_mm_info efi_ptd_info = { + .mm = &efi_mm, + .markers = efi_addr_markers, + .base_addr = 0, + .end = SZ_2G, +}; +#endif + /* Page Table Entry */ struct prot_bits { u64 mask; @@ -245,22 +276,22 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, } } -static void ptdump_walk(struct seq_file *s) +static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo) { struct pg_state st = { .seq = s, - .marker = address_markers, + .marker = pinfo->markers, .level = -1, .ptdump = { .note_page = note_page, .range = (struct ptdump_range[]) { - {KERN_VIRT_START, ULONG_MAX}, + {pinfo->base_addr, pinfo->end}, {0, 0} } } }; - ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL); } void ptdump_check_wx(void) @@ -293,7 +324,7 @@ void ptdump_check_wx(void) static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk(m); + ptdump_walk(m, m->private); return 0; } @@ -308,8 +339,13 @@ static int ptdump_init(void) for (j = 0; j < ARRAY_SIZE(pte_bits); j++) pg_level[i].mask |= pte_bits[j].mask; - debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info, &ptdump_fops); +#ifdef CONFIG_EFI + if (efi_enabled(EFI_RUNTIME_SERVICES)) + debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info, + &ptdump_fops); +#endif return 0; } |