From 671f9a3e2e24cdeb2d2856abee7422f093e23e29 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 28 Jun 2019 13:36:21 -0700 Subject: RISC-V: Setup initial page tables in two stages Currently, the setup_vm() does initial page table setup in one-shot very early before enabling MMU. Due to this, the setup_vm() has to map all possible kernel virtual addresses since it does not know size and location of RAM. This means we have kernel mappings for non-existent RAM and any buggy driver (or kernel) code doing out-of-bound access to RAM will not fault and cause underterministic behaviour. Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e. MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e. 516 KB) of memory for initial page tables which is never freed. The memory required for initial page tables will further increase if we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000) This patch implements two-staged initial page table setup, as follows: 1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in a early page table (i.e. early_pg_dir). The early_pg_dir will be used only by boot HART so it can be freed as-part of init memory free-up. 2. Final (i.e. setup_vm_final()): This stage maps all possible RAM banks in the final page table (i.e. swapper_pg_dir). The boot HART will start using swapper_pg_dir at the end of setup_vm_final(). All non-boot HARTs directly use the swapper_pg_dir created by boot HART. We have following advantages with this new approach: 1. Kernel mappings for non-existent RAM don't exists anymore. 2. Memory consumed by initial page tables is now indpendent of the chosen PAGE_OFFSET. 3. Memory consumed by initial page tables on RV64 system is 2 pages (i.e. 8 KB) which has significantly reduced and these pages will be freed as-part of the init memory free-up. The patch also provides a foundation for implementing strict kernel mappings where we protect kernel text and rodata using PTE permissions. Suggested-by: Mike Rapoport Signed-off-by: Anup Patel [paul.walmsley@sifive.com: updated to apply; fixed a checkpatch warning] Signed-off-by: Paul Walmsley --- arch/riscv/mm/init.c | 307 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 255 insertions(+), 52 deletions(-) (limited to 'arch/riscv/mm') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b1ca38642251..42bf939693d3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2019 Western Digital Corporation or its affiliates. */ #include @@ -41,13 +42,6 @@ void setup_zero_page(void) memset((void *)empty_zero_page, 0, PAGE_SIZE); } -void __init paging_init(void) -{ - setup_zero_page(); - local_flush_tlb_all(); - zone_sizes_init(); -} - void __init mem_init(void) { #ifdef CONFIG_FLATMEM @@ -143,17 +137,15 @@ EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; EXPORT_SYMBOL(pfn_base); +void *dtb_early_va; pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; -pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; +pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; +static bool mmu_enabled; -#ifndef __PAGETABLE_PMD_FOLDED -#define NUM_SWAPPER_PMDS ((uintptr_t)-PAGE_OFFSET >> PGDIR_SHIFT) -pmd_t swapper_pmd[PTRS_PER_PMD*((-PAGE_OFFSET)/PGDIR_SIZE)] __page_aligned_bss; -pmd_t trampoline_pmd[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); -pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif +#define MAX_EARLY_MAPPING_SIZE SZ_128M -pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; +pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { @@ -172,6 +164,156 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) } } +static pte_t *__init get_pte_virt(phys_addr_t pa) +{ + if (mmu_enabled) { + clear_fixmap(FIX_PTE); + return (pte_t *)set_fixmap_offset(FIX_PTE, pa); + } else { + return (pte_t *)((uintptr_t)pa); + } +} + +static phys_addr_t __init alloc_pte(uintptr_t va) +{ + /* + * We only create PMD or PGD early mappings so we + * should never reach here with MMU disabled. + */ + BUG_ON(!mmu_enabled); + + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static void __init create_pte_mapping(pte_t *ptep, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + uintptr_t pte_index = pte_index(va); + + BUG_ON(sz != PAGE_SIZE); + + if (pte_none(ptep[pte_index])) + ptep[pte_index] = pfn_pte(PFN_DOWN(pa), prot); +} + +#ifndef __PAGETABLE_PMD_FOLDED + +pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; +pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; + +#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE +#define NUM_EARLY_PMDS 1UL +#else +#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) +#endif +pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); + +static pmd_t *__init get_pmd_virt(phys_addr_t pa) +{ + if (mmu_enabled) { + clear_fixmap(FIX_PMD); + return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); + } else { + return (pmd_t *)((uintptr_t)pa); + } +} + +static phys_addr_t __init alloc_pmd(uintptr_t va) +{ + uintptr_t pmd_num; + + if (mmu_enabled) + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + + pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; + BUG_ON(pmd_num >= NUM_EARLY_PMDS); + return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; +} + +static void __init create_pmd_mapping(pmd_t *pmdp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pte_t *ptep; + phys_addr_t pte_phys; + uintptr_t pmd_index = pmd_index(va); + + if (sz == PMD_SIZE) { + if (pmd_none(pmdp[pmd_index])) + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pa), prot); + return; + } + + if (pmd_none(pmdp[pmd_index])) { + pte_phys = alloc_pte(va); + pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); + ptep = get_pte_virt(pte_phys); + memset(ptep, 0, PAGE_SIZE); + } else { + pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_index])); + ptep = get_pte_virt(pte_phys); + } + + create_pte_mapping(ptep, va, pa, sz, prot); +} + +#define pgd_next_t pmd_t +#define alloc_pgd_next(__va) alloc_pmd(__va) +#define get_pgd_next_virt(__pa) get_pmd_virt(__pa) +#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) +#define PTE_PARENT_SIZE PMD_SIZE +#define fixmap_pgd_next fixmap_pmd +#else +#define pgd_next_t pte_t +#define alloc_pgd_next(__va) alloc_pte(__va) +#define get_pgd_next_virt(__pa) get_pte_virt(__pa) +#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + create_pte_mapping(__nextp, __va, __pa, __sz, __prot) +#define PTE_PARENT_SIZE PGDIR_SIZE +#define fixmap_pgd_next fixmap_pte +#endif + +static void __init create_pgd_mapping(pgd_t *pgdp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pgd_next_t *nextp; + phys_addr_t next_phys; + uintptr_t pgd_index = pgd_index(va); + + if (sz == PGDIR_SIZE) { + if (pgd_val(pgdp[pgd_index]) == 0) + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot); + return; + } + + if (pgd_val(pgdp[pgd_index]) == 0) { + next_phys = alloc_pgd_next(va); + pgdp[pgd_index] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = get_pgd_next_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index])); + nextp = get_pgd_next_virt(next_phys); + } + + create_pgd_next_mapping(nextp, va, pa, sz, prot); +} + +static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) +{ + uintptr_t map_size = PAGE_SIZE; + + /* Upgrade to PMD/PGDIR mappings whenever possible */ + if (!(base & (PTE_PARENT_SIZE - 1)) && + !(size & (PTE_PARENT_SIZE - 1))) + map_size = PTE_PARENT_SIZE; + + return map_size; +} + /* * setup_vm() is called from head.S with MMU-off. * @@ -191,54 +333,115 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) "not use absolute addressing." #endif -asmlinkage void __init setup_vm(void) +asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t i; - uintptr_t pa = (uintptr_t) &_start; - pgprot_t prot = __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_EXEC); + uintptr_t va, end_va; + uintptr_t load_pa = (uintptr_t)(&_start); + uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; + uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); + + va_pa_offset = PAGE_OFFSET - load_pa; + pfn_base = PFN_DOWN(load_pa); - va_pa_offset = PAGE_OFFSET - pa; - pfn_base = PFN_DOWN(pa); + /* + * Enforce boot alignment requirements of RV32 and + * RV64 by only allowing PMD or PGD mappings. + */ + BUG_ON(map_size == PAGE_SIZE); /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); - BUG_ON((pa % (PAGE_SIZE * PTRS_PER_PTE)) != 0); + BUG_ON((load_pa % map_size) != 0); + BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); + + /* Setup early PGD for fixmap */ + create_pgd_mapping(early_pg_dir, FIXADDR_START, + (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED - trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)trampoline_pmd), - __pgprot(_PAGE_TABLE)); - trampoline_pmd[0] = pfn_pmd(PFN_DOWN(pa), prot); + /* Setup fixmap PMD */ + create_pmd_mapping(fixmap_pmd, FIXADDR_START, + (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); + /* Setup trampoline PGD and PMD */ + create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); + create_pmd_mapping(trampoline_pmd, PAGE_OFFSET, + load_pa, PMD_SIZE, PAGE_KERNEL_EXEC); +#else + /* Setup trampoline PGD */ + create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC); +#endif - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) { - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i; + /* + * Setup early PGD covering entire kernel which will allows + * us to reach paging_init(). We map all memory banks later + * in setup_vm_final() below. + */ + end_va = PAGE_OFFSET + load_sz; + for (va = PAGE_OFFSET; va < end_va; va += map_size) + create_pgd_mapping(early_pg_dir, va, + load_pa + (va - PAGE_OFFSET), + map_size, PAGE_KERNEL_EXEC); + + /* Create fixed mapping for early FDT parsing */ + end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE; + for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE) + create_pte_mapping(fixmap_pte, va, + dtb_pa + (va - __fix_to_virt(FIX_FDT)), + PAGE_SIZE, PAGE_KERNEL); + + /* Save pointer to DTB for early FDT parsing */ + dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK); +} - swapper_pg_dir[o] = - pfn_pgd(PFN_DOWN((uintptr_t)swapper_pmd) + i, - __pgprot(_PAGE_TABLE)); - } - for (i = 0; i < ARRAY_SIZE(swapper_pmd); i++) - swapper_pmd[i] = pfn_pmd(PFN_DOWN(pa + i * PMD_SIZE), prot); - - swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pmd), - __pgprot(_PAGE_TABLE)); - fixmap_pmd[(FIXADDR_START >> PMD_SHIFT) % PTRS_PER_PMD] = - pfn_pmd(PFN_DOWN((uintptr_t)fixmap_pte), - __pgprot(_PAGE_TABLE)); -#else - trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN(pa), prot); +static void __init setup_vm_final(void) +{ + uintptr_t va, map_size; + phys_addr_t pa, start, end; + struct memblock_region *reg; + + /* Set mmu_enabled flag */ + mmu_enabled = true; - for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) { - size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i; + /* Setup swapper PGD for fixmap */ + create_pgd_mapping(swapper_pg_dir, FIXADDR_START, + __pa(fixmap_pgd_next), + PGDIR_SIZE, PAGE_TABLE); - swapper_pg_dir[o] = - pfn_pgd(PFN_DOWN(pa + i * PGDIR_SIZE), prot); + /* Map all memory banks */ + for_each_memblock(memory, reg) { + start = reg->base; + end = start + reg->size; + + if (start >= end) + break; + if (memblock_is_nomap(reg)) + continue; + if (start <= __pa(PAGE_OFFSET) && + __pa(PAGE_OFFSET) < end) + start = __pa(PAGE_OFFSET); + + map_size = best_map_size(start, end - start); + for (pa = start; pa < end; pa += map_size) { + va = (uintptr_t)__va(pa); + create_pgd_mapping(swapper_pg_dir, va, pa, + map_size, PAGE_KERNEL_EXEC); + } } - swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] = - pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pte), - __pgprot(_PAGE_TABLE)); -#endif + /* Clear fixmap PTE and PMD mappings */ + clear_fixmap(FIX_PTE); + clear_fixmap(FIX_PMD); + + /* Move to swapper page table */ + csr_write(sptbr, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE); + local_flush_tlb_all(); +} + +void __init paging_init(void) +{ + setup_vm_final(); + setup_zero_page(); + zone_sizes_init(); } -- cgit v1.2.3