summaryrefslogtreecommitdiff
path: root/arch/s390/mm/vmem.c
diff options
context:
space:
mode:
authorAlexander Gordeev <agordeev@linux.ibm.com>2022-12-13 13:35:11 +0300
committerHeiko Carstens <hca@linux.ibm.com>2023-01-13 16:15:05 +0300
commitbb1520d581a3a46e2d6e12bb74604ace33404de5 (patch)
treee30707db7e1375a9b4bb8bd40102e57aee8c968a /arch/s390/mm/vmem.c
parentbd50b7436217b4123911c2bca1efd74718654f06 (diff)
downloadlinux-bb1520d581a3a46e2d6e12bb74604ace33404de5.tar.xz
s390/mm: start kernel with DAT enabled
The setup of the kernel virtual address space is spread throughout the sources, boot stages and config options like this: 1. The available physical memory regions are queried and stored as mem_detect information for later use in the decompressor. 2. Based on the physical memory availability the virtual memory layout is established in the decompressor; 3. If CONFIG_KASAN is disabled the kernel paging setup code populates kernel pgtables and turns DAT mode on. It uses the information stored at step [1]. 4. If CONFIG_KASAN is enabled the kernel early boot kasan setup populates kernel pgtables and turns DAT mode on. It uses the information stored at step [1]. The kasan setup creates early_pg_dir directory and directly overwrites swapper_pg_dir entries to make shadow memory pages available. Move the kernel virtual memory setup to the decompressor and start the kernel with DAT turned on right from the very first istruction. That completely eliminates the boot phase when the kernel runs in DAT-off mode, simplies the overall design and consolidates pgtables setup. The identity mapping is created in the decompressor, while kasan shadow mappings are still created by the early boot kernel code. Share with decompressor the existing kasan memory allocator. It decreases the size of a newly requested memory block from pgalloc_pos and ensures that kernel image is not overwritten. pgalloc_low and pgalloc_pos pointers are made preserved boot variables for that. Use the bootdata infrastructure to setup swapper_pg_dir and invalid_pg_dir directories used by the kernel later. The interim early_pg_dir directory established by the kasan initialization code gets eliminated as result. As the kernel runs in DAT-on mode only the PSW_KERNEL_BITS define gets PSW_MASK_DAT bit by default. Additionally, the setup_lowcore_dat_off() and setup_lowcore_dat_on() routines get merged, since there is no DAT-off mode stage anymore. The memory mappings are created with RW+X protection that allows the early boot code setting up all necessary data and services for the kernel being booted. Just before the paging is enabled the memory protection is changed to RO+X for text, RO+NX for read-only data and RW+NX for kernel data and the identity mapping. Reviewed-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/mm/vmem.c')
-rw-r--r--arch/s390/mm/vmem.c96
1 files changed, 88 insertions, 8 deletions
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ee1a97078527..78d7768f93d7 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <asm/cacheflush.h>
#include <asm/nospec-branch.h>
#include <asm/pgalloc.h>
@@ -657,6 +658,29 @@ void vmem_unmap_4k_page(unsigned long addr)
mutex_unlock(&vmem_mutex);
}
+static int __init memblock_region_cmp(const void *a, const void *b)
+{
+ const struct memblock_region *r1 = a;
+ const struct memblock_region *r2 = b;
+
+ if (r1->base < r2->base)
+ return -1;
+ if (r1->base > r2->base)
+ return 1;
+ return 0;
+}
+
+static void __init memblock_region_swap(void *a, void *b, int size)
+{
+ struct memblock_region *r1 = a;
+ struct memblock_region *r2 = b;
+ struct memblock_region swap;
+
+ swap = *r1;
+ *r1 = *r2;
+ *r2 = swap;
+}
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
@@ -664,11 +688,68 @@ void vmem_unmap_4k_page(unsigned long addr)
*/
void __init vmem_map_init(void)
{
+ struct memblock_region memory_rwx_regions[] = {
+ {
+ .base = 0,
+ .size = sizeof(struct lowcore),
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_stext),
+ .size = _etext - _stext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_sinittext),
+ .size = _einittext - _sinittext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __stext_amode31,
+ .size = __etext_amode31 - __stext_amode31,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ };
+ struct memblock_type memory_rwx = {
+ .regions = memory_rwx_regions,
+ .cnt = ARRAY_SIZE(memory_rwx_regions),
+ .max = ARRAY_SIZE(memory_rwx_regions),
+ };
phys_addr_t base, end;
u64 i;
- for_each_mem_range(i, &base, &end)
- vmem_add_range(base, end - base);
+ /*
+ * Set RW+NX attribute on all memory, except regions enumerated with
+ * memory_rwx exclude type. These regions need different attributes,
+ * which are enforced afterwards.
+ *
+ * __for_each_mem_range() iterate and exclude types should be sorted.
+ * The relative location of _stext and _sinittext is hardcoded in the
+ * linker script. However a location of __stext_amode31 and the kernel
+ * image itself are chosen dynamically. Thus, sort the exclude type.
+ */
+ sort(&memory_rwx_regions,
+ ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
+ memblock_region_cmp, memblock_region_swap);
+ __for_each_mem_range(i, &memblock.memory, &memory_rwx,
+ NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
+ __set_memory((unsigned long)__va(base),
+ (end - base) >> PAGE_SHIFT,
+ SET_MEMORY_RW | SET_MEMORY_NX);
+ }
+
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
@@ -678,15 +759,14 @@ void __init vmem_map_init(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
+ __set_memory(__stext_amode31,
+ (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- /* lowcore requires 4k mapping for real addresses / prefixing */
- set_memory_4k(0, LC_PAGES);
-
/* lowcore must be executable for LPSWE */
- if (!static_key_enabled(&cpu_has_bear))
- set_memory_x(0, 1);
+ if (static_key_enabled(&cpu_has_bear))
+ set_memory_nx(0, 1);
+ set_memory_nx(PAGE_SIZE, 1);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);