summaryrefslogtreecommitdiff
path: root/arch/s390/boot
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/boot')
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/boot.h40
-rw-r--r--arch/s390/boot/decompressor.c1
-rw-r--r--arch/s390/boot/decompressor.h26
-rw-r--r--arch/s390/boot/kaslr.c16
-rw-r--r--arch/s390/boot/mem_detect.c72
-rw-r--r--arch/s390/boot/startup.c86
-rw-r--r--arch/s390/boot/vmem.c278
8 files changed, 428 insertions, 93 deletions
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index d52c3e2e16bc..47a397da0498 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -35,7 +35,7 @@ endif
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 70418389414d..ed85b144119a 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -8,10 +8,36 @@
#ifndef __ASSEMBLY__
+struct machine_info {
+ unsigned char has_edat1 : 1;
+ unsigned char has_edat2 : 1;
+ unsigned char has_nx : 1;
+};
+
+struct vmlinux_info {
+ unsigned long default_lma;
+ unsigned long entry;
+ unsigned long image_size; /* does not include .bss */
+ unsigned long bss_size; /* uncompressed image .bss size */
+ unsigned long bootdata_off;
+ unsigned long bootdata_size;
+ unsigned long bootdata_preserved_off;
+ unsigned long bootdata_preserved_size;
+ unsigned long dynsym_start;
+ unsigned long rela_dyn_start;
+ unsigned long rela_dyn_end;
+ unsigned long amode31_size;
+ unsigned long init_mm_off;
+ unsigned long swapper_pg_dir_off;
+ unsigned long invalid_pg_dir_off;
+};
+
void startup_kernel(void);
-unsigned long detect_memory(void);
+unsigned long detect_memory(unsigned long *safe_addr);
+void mem_detect_truncate(unsigned long limit);
bool is_ipl_block_dump(void);
void store_ipl_parmblock(void);
+unsigned long read_ipl_report(unsigned long safe_addr);
void setup_boot_command_line(void);
void parse_boot_command_line(void);
void verify_facilities(void);
@@ -19,7 +45,12 @@ void print_missing_facilities(void);
void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr);
+void setup_vmem(unsigned long asce_limit);
+unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void error(char *m);
+
+extern struct machine_info machine;
/* Symbols defined by linker scripts */
extern const char kernel_version[];
@@ -31,8 +62,13 @@ extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
extern char _decompressor_syms_start[], _decompressor_syms_end[];
extern char _stack_start[], _stack_end[];
+extern char _end[];
+extern unsigned char _compressed_start[];
+extern unsigned char _compressed_end[];
+extern struct vmlinux_info _vmlinux_info;
+#define vmlinux _vmlinux_info
-unsigned long read_ipl_report(unsigned long safe_offset);
+#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
#endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index b519a1f045d8..d762733a0753 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <asm/page.h>
#include "decompressor.h"
+#include "boot.h"
/*
* gzip declarations
diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h
index f75cc31a77dd..92b81d2ea35d 100644
--- a/arch/s390/boot/decompressor.h
+++ b/arch/s390/boot/decompressor.h
@@ -2,37 +2,11 @@
#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
#define BOOT_COMPRESSED_DECOMPRESSOR_H
-#include <linux/stddef.h>
-
#ifdef CONFIG_KERNEL_UNCOMPRESSED
static inline void *decompress_kernel(void) { return NULL; }
#else
void *decompress_kernel(void);
#endif
unsigned long mem_safe_offset(void);
-void error(char *m);
-
-struct vmlinux_info {
- unsigned long default_lma;
- void (*entry)(void);
- unsigned long image_size; /* does not include .bss */
- unsigned long bss_size; /* uncompressed image .bss size */
- unsigned long bootdata_off;
- unsigned long bootdata_size;
- unsigned long bootdata_preserved_off;
- unsigned long bootdata_preserved_size;
- unsigned long dynsym_start;
- unsigned long rela_dyn_start;
- unsigned long rela_dyn_end;
- unsigned long amode31_size;
-};
-
-/* Symbols defined by linker scripts */
-extern char _end[];
-extern unsigned char _compressed_start[];
-extern unsigned char _compressed_end[];
-extern char _vmlinux_info[];
-
-#define vmlinux (*(struct vmlinux_info *)_vmlinux_info)
#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index e8d74d4f62aa..70ff68dd1fee 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -172,26 +172,20 @@ static unsigned long position_to_address(unsigned long pos, unsigned long kernel
unsigned long get_random_base(unsigned long safe_addr)
{
+ unsigned long online_mem_total = get_mem_detect_online_total();
unsigned long memory_limit = get_mem_detect_end();
unsigned long base_pos, max_pos, kernel_size;
- unsigned long kasan_needs;
int i;
- memory_limit = min(memory_limit, ident_map_size);
-
/*
* Avoid putting kernel in the end of physical memory
- * which kasan will use for shadow memory and early pgtable
- * mapping allocations.
+ * which vmem and kasan code will use for shadow memory and
+ * pgtable mapping allocations.
*/
- memory_limit -= kasan_estimate_memory_needs(memory_limit);
+ memory_limit -= kasan_estimate_memory_needs(online_mem_total);
+ memory_limit -= vmem_estimate_memory_needs(online_mem_total);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size) {
- if (safe_addr < initrd_data.start + initrd_data.size)
- safe_addr = initrd_data.start + initrd_data.size;
- }
safe_addr = ALIGN(safe_addr, THREAD_SIZE);
-
kernel_size = vmlinux.image_size + vmlinux.bss_size;
if (safe_addr + kernel_size > memory_limit)
return 0;
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 7fa1a32ea0f3..3058d397a9da 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -16,29 +16,10 @@ struct mem_detect_info __bootdata(mem_detect);
#define ENTRIES_EXTENDED_MAX \
(256 * (1020 / 2) * sizeof(struct mem_detect_block))
-/*
- * To avoid corrupting old kernel memory during dump, find lowest memory
- * chunk possible either right after the kernel end (decompressed kernel) or
- * after initrd (if it is present and there is no hole between the kernel end
- * and initrd)
- */
-static void *mem_detect_alloc_extended(void)
-{
- unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
-
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
- initrd_data.start < offset + ENTRIES_EXTENDED_MAX)
- offset = ALIGN(initrd_data.start + initrd_data.size, sizeof(u64));
-
- return (void *)offset;
-}
-
static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n)
{
if (n < MEM_INLINED_ENTRIES)
return &mem_detect.entries[n];
- if (unlikely(!mem_detect.entries_extended))
- mem_detect.entries_extended = mem_detect_alloc_extended();
return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES];
}
@@ -147,7 +128,7 @@ static int tprot(unsigned long addr)
return rc;
}
-static void search_mem_end(void)
+static unsigned long search_mem_end(void)
{
unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */
unsigned long offset = 0;
@@ -159,33 +140,52 @@ static void search_mem_end(void)
if (!tprot(pivot << 20))
offset = pivot;
}
-
- add_mem_detect_block(0, (offset + 1) << 20);
+ return (offset + 1) << 20;
}
-unsigned long detect_memory(void)
+unsigned long detect_memory(unsigned long *safe_addr)
{
- unsigned long max_physmem_end;
+ unsigned long max_physmem_end = 0;
sclp_early_get_memsize(&max_physmem_end);
+ mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64));
if (!sclp_early_read_storage_info()) {
mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO;
- return max_physmem_end;
- }
-
- if (!diag260()) {
+ } else if (!diag260()) {
mem_detect.info_source = MEM_DETECT_DIAG260;
- return max_physmem_end;
- }
-
- if (max_physmem_end) {
+ max_physmem_end = max_physmem_end ?: get_mem_detect_end();
+ } else if (max_physmem_end) {
add_mem_detect_block(0, max_physmem_end);
mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO;
- return max_physmem_end;
+ } else {
+ max_physmem_end = search_mem_end();
+ add_mem_detect_block(0, max_physmem_end);
+ mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
}
- search_mem_end();
- mem_detect.info_source = MEM_DETECT_BIN_SEARCH;
- return get_mem_detect_end();
+ if (mem_detect.count > MEM_INLINED_ENTRIES) {
+ *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) *
+ sizeof(struct mem_detect_block);
+ }
+
+ return max_physmem_end;
+}
+
+void mem_detect_truncate(unsigned long limit)
+{
+ struct mem_detect_block *block;
+ int i;
+
+ for (i = 0; i < mem_detect.count; i++) {
+ block = __get_mem_detect_block_ptr(i);
+ if (block->start >= limit) {
+ mem_detect.count = i;
+ break;
+ } else if (block->end > limit) {
+ block->end = (u64)limit;
+ mem_detect.count = i + 1;
+ break;
+ }
+ }
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 47ca3264c023..f5a7545d3c13 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -3,6 +3,7 @@
#include <linux/elf.h>
#include <asm/boot_data.h>
#include <asm/sections.h>
+#include <asm/maccess.h>
#include <asm/cpu_mf.h>
#include <asm/setup.h>
#include <asm/kasan.h>
@@ -11,6 +12,7 @@
#include <asm/diag.h>
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
+#include <asm/mem_detect.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
@@ -18,6 +20,7 @@
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata_preserved(__memcpy_real_area);
+pte_t *__bootdata_preserved(memcpy_real_ptep);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
@@ -33,6 +36,8 @@ u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
+struct machine_info machine;
+
void error(char *x)
{
sclp_early_printk("\n\n");
@@ -42,6 +47,20 @@ void error(char *x)
disabled_wait();
}
+static void detect_facilities(void)
+{
+ if (test_facility(8)) {
+ machine.has_edat1 = 1;
+ __ctl_set_bit(0, 23);
+ }
+ if (test_facility(78))
+ machine.has_edat2 = 1;
+ if (!noexec_disabled && test_facility(130)) {
+ machine.has_nx = 1;
+ __ctl_set_bit(0, 20);
+ }
+}
+
static void setup_lpp(void)
{
S390_lowcore.current_pid = 0;
@@ -57,16 +76,17 @@ unsigned long mem_safe_offset(void)
}
#endif
-static void rescue_initrd(unsigned long addr)
+static unsigned long rescue_initrd(unsigned long safe_addr)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
- return;
+ return safe_addr;
if (!initrd_data.start || !initrd_data.size)
- return;
- if (addr <= initrd_data.start)
- return;
- memmove((void *)addr, (void *)initrd_data.start, initrd_data.size);
- initrd_data.start = addr;
+ return safe_addr;
+ if (initrd_data.start < safe_addr) {
+ memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size);
+ initrd_data.start = safe_addr;
+ }
+ return initrd_data.start + initrd_data.size;
}
static void copy_bootdata(void)
@@ -150,9 +170,10 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
#endif
}
-static void setup_kernel_memory_layout(void)
+static unsigned long setup_kernel_memory_layout(void)
{
unsigned long vmemmap_start;
+ unsigned long asce_limit;
unsigned long rte_size;
unsigned long pages;
unsigned long vmax;
@@ -167,10 +188,10 @@ static void setup_kernel_memory_layout(void)
vmalloc_size > _REGION2_SIZE ||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
_REGION2_SIZE) {
- vmax = _REGION1_SIZE;
+ asce_limit = _REGION1_SIZE;
rte_size = _REGION2_SIZE;
} else {
- vmax = _REGION2_SIZE;
+ asce_limit = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
}
/*
@@ -178,7 +199,7 @@ static void setup_kernel_memory_layout(void)
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
*/
- vmax = adjust_to_uv_max(vmax);
+ vmax = adjust_to_uv_max(asce_limit);
#ifdef CONFIG_KASAN
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
@@ -207,6 +228,8 @@ static void setup_kernel_memory_layout(void)
/* make sure vmemmap doesn't overlay with vmalloc area */
VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
vmemmap = (struct page *)vmemmap_start;
+
+ return asce_limit;
}
/*
@@ -240,19 +263,25 @@ static void offset_vmlinux_info(unsigned long offset)
vmlinux.rela_dyn_start += offset;
vmlinux.rela_dyn_end += offset;
vmlinux.dynsym_start += offset;
+ vmlinux.init_mm_off += offset;
+ vmlinux.swapper_pg_dir_off += offset;
+ vmlinux.invalid_pg_dir_off += offset;
}
static unsigned long reserve_amode31(unsigned long safe_addr)
{
__amode31_base = PAGE_ALIGN(safe_addr);
- return safe_addr + vmlinux.amode31_size;
+ return __amode31_base + vmlinux.amode31_size;
}
void startup_kernel(void)
{
+ unsigned long max_physmem_end;
unsigned long random_lma;
unsigned long safe_addr;
+ unsigned long asce_limit;
void *img;
+ psw_t psw;
initrd_data.start = parmarea.initrd_start;
initrd_data.size = parmarea.initrd_size;
@@ -265,14 +294,17 @@ void startup_kernel(void)
safe_addr = reserve_amode31(safe_addr);
safe_addr = read_ipl_report(safe_addr);
uv_query_info();
- rescue_initrd(safe_addr);
+ safe_addr = rescue_initrd(safe_addr);
sclp_early_read_info();
setup_boot_command_line();
parse_boot_command_line();
+ detect_facilities();
sanitize_prot_virt_host();
- setup_ident_map_size(detect_memory());
+ max_physmem_end = detect_memory(&safe_addr);
+ setup_ident_map_size(max_physmem_end);
setup_vmalloc_size();
- setup_kernel_memory_layout();
+ asce_limit = setup_kernel_memory_layout();
+ mem_detect_truncate(ident_map_size);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
random_lma = get_random_base(safe_addr);
@@ -289,9 +321,23 @@ void startup_kernel(void)
} else if (__kaslr_offset)
memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+ /*
+ * The order of the following operations is important:
+ *
+ * - handle_relocs() must follow clear_bss_section() to establish static
+ * memory references to data in .bss to be used by setup_vmem()
+ * (i.e init_mm.pgd)
+ *
+ * - setup_vmem() must follow handle_relocs() to be able using
+ * static memory references to data in .bss (i.e init_mm.pgd)
+ *
+ * - copy_bootdata() must follow setup_vmem() to propagate changes to
+ * bootdata made by setup_vmem()
+ */
clear_bss_section();
- copy_bootdata();
handle_relocs(__kaslr_offset);
+ setup_vmem(asce_limit);
+ copy_bootdata();
if (__kaslr_offset) {
/*
@@ -303,5 +349,11 @@ void startup_kernel(void)
if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
memset(img, 0, vmlinux.image_size);
}
- vmlinux.entry();
+
+ /*
+ * Jump to the decompressed kernel entry point and switch DAT mode on.
+ */
+ psw.addr = vmlinux.entry;
+ psw.mask = PSW_KERNEL_BITS;
+ __load_psw(psw);
}
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
new file mode 100644
index 000000000000..4e54357ccd00
--- /dev/null
+++ b/arch/s390/boot/vmem.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched/task.h>
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/mem_detect.h>
+#include <asm/maccess.h>
+#include <asm/abs_lowcore.h>
+#include "decompressor.h"
+#include "boot.h"
+
+#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
+#define swapper_pg_dir vmlinux.swapper_pg_dir_off
+#define invalid_pg_dir vmlinux.invalid_pg_dir_off
+
+/*
+ * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though.
+ */
+static inline pte_t *__virt_to_kpte(unsigned long va)
+{
+ return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
+}
+
+unsigned long __bootdata_preserved(s390_invalid_asce);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
+
+enum populate_mode {
+ POPULATE_NONE,
+ POPULATE_ONE2ONE,
+ POPULATE_ABS_LOWCORE,
+};
+
+static void boot_check_oom(void)
+{
+ if (pgalloc_pos < pgalloc_low)
+ error("out of memory on boot\n");
+}
+
+static void pgtable_populate_init(void)
+{
+ unsigned long initrd_end;
+ unsigned long kernel_end;
+
+ kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+ pgalloc_low = round_up(kernel_end, PAGE_SIZE);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
+ initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
+ pgalloc_low = max(pgalloc_low, initrd_end);
+ }
+
+ pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE);
+ pgalloc_pos = pgalloc_end;
+
+ boot_check_oom();
+}
+
+static void *boot_alloc_pages(unsigned int order)
+{
+ unsigned long size = PAGE_SIZE << order;
+
+ pgalloc_pos -= size;
+ pgalloc_pos = round_down(pgalloc_pos, size);
+
+ boot_check_oom();
+
+ return (void *)pgalloc_pos;
+}
+
+static void *boot_crst_alloc(unsigned long val)
+{
+ unsigned long *table;
+
+ table = boot_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
+}
+
+static pte_t *boot_pte_alloc(void)
+{
+ static void *pte_leftover;
+ pte_t *pte;
+
+ BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
+
+ if (!pte_leftover) {
+ pte_leftover = boot_alloc_pages(0);
+ pte = pte_leftover + _PAGE_TABLE_SIZE;
+ } else {
+ pte = pte_leftover;
+ pte_leftover = NULL;
+ }
+ memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ return pte;
+}
+
+static unsigned long _pa(unsigned long addr, enum populate_mode mode)
+{
+ switch (mode) {
+ case POPULATE_NONE:
+ return -1;
+ case POPULATE_ONE2ONE:
+ return addr;
+ case POPULATE_ABS_LOWCORE:
+ return __abs_lowcore_pa(addr);
+ default:
+ return -1;
+ }
+}
+
+static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat2 &&
+ IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+}
+
+static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat1 &&
+ IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+}
+
+static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pte_t *pte, entry;
+
+ pte = pte_offset_kernel(pmd, addr);
+ for (; addr < end; addr += PAGE_SIZE, pte++) {
+ if (pte_none(*pte)) {
+ entry = __pte(_pa(addr, mode));
+ entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+ set_pte(pte, entry);
+ }
+ }
+}
+
+static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pmd_t *pmd, entry;
+ pte_t *pte;
+
+ pmd = pmd_offset(pud, addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd)) {
+ if (can_large_pmd(pmd, addr, next)) {
+ entry = __pmd(_pa(addr, mode));
+ entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+ set_pmd(pmd, entry);
+ continue;
+ }
+ pte = boot_pte_alloc();
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (pmd_large(*pmd)) {
+ continue;
+ }
+ pgtable_pte_populate(pmd, addr, next, mode);
+ }
+}
+
+static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ pud_t *pud, entry;
+ pmd_t *pmd;
+
+ pud = pud_offset(p4d, addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud)) {
+ if (can_large_pud(pud, addr, next)) {
+ entry = __pud(_pa(addr, mode));
+ entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+ set_pud(pud, entry);
+ continue;
+ }
+ pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ pud_populate(&init_mm, pud, pmd);
+ } else if (pud_large(*pud)) {
+ continue;
+ }
+ pgtable_pmd_populate(pud, addr, next, mode);
+ }
+}
+
+static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long next;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ for (; addr < end; addr = next, p4d++) {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d)) {
+ pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ pgtable_pud_populate(p4d, addr, next, mode);
+ }
+}
+
+static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode)
+{
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+
+ pgd = pgd_offset(&init_mm, addr);
+ for (; addr < end; addr = next, pgd++) {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*pgd)) {
+ p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ pgtable_p4d_populate(pgd, addr, next, mode);
+ }
+}
+
+void setup_vmem(unsigned long asce_limit)
+{
+ unsigned long start, end;
+ unsigned long asce_type;
+ unsigned long asce_bits;
+ int i;
+
+ if (asce_limit == _REGION1_SIZE) {
+ asce_type = _REGION2_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ } else {
+ asce_type = _REGION3_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ }
+ s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+
+ crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
+ crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+
+ /*
+ * To allow prefixing the lowcore must be mapped with 4KB pages.
+ * To prevent creation of a large page at address 0 first map
+ * the lowcore and create the identity mapping only afterwards.
+ */
+ pgtable_populate_init();
+ pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
+ for_each_mem_detect_block(i, &start, &end)
+ pgtable_populate(start, end, POPULATE_ONE2ONE);
+ pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
+ POPULATE_ABS_LOWCORE);
+ pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
+ POPULATE_NONE);
+ memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
+
+ S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
+ S390_lowcore.user_asce = s390_invalid_asce;
+
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+
+ init_mm.context.asce = S390_lowcore.kernel_asce;
+}
+
+unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total)
+{
+ unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE);
+
+ return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
+}