summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig67
-rw-r--r--arch/s390/Makefile15
-rw-r--r--arch/s390/boot/Makefile9
-rw-r--r--arch/s390/boot/boot.h14
-rw-r--r--arch/s390/boot/decompressor.c15
-rw-r--r--arch/s390/boot/decompressor.h8
-rw-r--r--arch/s390/boot/kaslr.c2
-rw-r--r--arch/s390/boot/pgm_check_info.c4
-rw-r--r--arch/s390/boot/startup.c263
-rw-r--r--arch/s390/boot/vmem.c108
-rw-r--r--arch/s390/boot/vmlinux.lds.S28
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/s390/crypto/paes_s390.c15
-rw-r--r--arch/s390/include/asm/ap.h30
-rw-r--r--arch/s390/include/asm/asm-prototypes.h1
-rw-r--r--arch/s390/include/asm/atomic.h44
-rw-r--r--arch/s390/include/asm/atomic_ops.h22
-rw-r--r--arch/s390/include/asm/chsc.h15
-rw-r--r--arch/s390/include/asm/dwarf.h1
-rw-r--r--arch/s390/include/asm/extmem.h7
-rw-r--r--arch/s390/include/asm/ftrace.h8
-rw-r--r--arch/s390/include/asm/gmap.h2
-rw-r--r--arch/s390/include/asm/hugetlb.h6
-rw-r--r--arch/s390/include/asm/io.h15
-rw-r--r--arch/s390/include/asm/mmu.h5
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/nospec-branch.h20
-rw-r--r--arch/s390/include/asm/nospec-insn.h13
-rw-r--r--arch/s390/include/asm/os_info.h29
-rw-r--r--arch/s390/include/asm/page.h50
-rw-r--r--arch/s390/include/asm/pgtable.h27
-rw-r--r--arch/s390/include/asm/physmem_info.h4
-rw-r--r--arch/s390/include/asm/preempt.h36
-rw-r--r--arch/s390/include/asm/setup.h14
-rw-r--r--arch/s390/include/asm/vdso/gettimeofday.h7
-rw-r--r--arch/s390/include/asm/vtime.h2
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/cert_store.c1
-rw-r--r--arch/s390/kernel/crash_dump.c41
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/ftrace.c7
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/s390/kernel/irq.c1
-rw-r--r--arch/s390/kernel/kprobes.c4
-rw-r--r--arch/s390/kernel/module.c42
-rw-r--r--arch/s390/kernel/nmi.c1
-rw-r--r--arch/s390/kernel/nospec-branch.c4
-rw-r--r--arch/s390/kernel/os_info.c29
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c11
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c10
-rw-r--r--arch/s390/kernel/perf_pai_ext.c10
-rw-r--r--arch/s390/kernel/setup.c6
-rw-r--r--arch/s390/kernel/stacktrace.c19
-rw-r--r--arch/s390/kernel/syscalls/Makefile4
-rw-r--r--arch/s390/kernel/uv.c51
-rw-r--r--arch/s390/kernel/vdso32/Makefile12
-rw-r--r--arch/s390/kernel/vdso64/Makefile12
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S2
-rw-r--r--arch/s390/kernel/vmcore_info.c2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S38
-rw-r--r--arch/s390/kvm/kvm-s390.c6
-rw-r--r--arch/s390/kvm/vsie.c5
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/expoline.S (renamed from arch/s390/lib/expoline/expoline.S)0
-rw-r--r--arch/s390/lib/expoline/Makefile3
-rw-r--r--arch/s390/mm/fault.c5
-rw-r--r--arch/s390/mm/gmap.c167
-rw-r--r--arch/s390/mm/hugetlbpage.c21
-rw-r--r--arch/s390/mm/init.c30
-rw-r--r--arch/s390/mm/mmap.c9
-rw-r--r--arch/s390/mm/vmem.c5
-rw-r--r--arch/s390/net/bpf_jit_comp.c60
-rw-r--r--arch/s390/pci/pci.c6
-rw-r--r--arch/s390/pci/pci_mmio.c4
-rw-r--r--arch/s390/pci/pci_sysfs.c4
-rw-r--r--arch/s390/purgatory/Makefile8
-rw-r--r--arch/s390/tools/relocs.c2
79 files changed, 903 insertions, 665 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8f01ada6845e..c59d2b54df49 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -17,6 +17,9 @@ config ARCH_HAS_ILOG2_U32
config ARCH_HAS_ILOG2_U64
def_bool n
+config ARCH_PROC_KCORE_TEXT
+ def_bool y
+
config GENERIC_HWEIGHT
def_bool y
@@ -174,7 +177,7 @@ config S390
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
- select HAVE_FAST_GUP
+ select HAVE_GUP_FAST
select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
@@ -552,7 +555,7 @@ config EXPOLINE
If unsure, say N.
config EXPOLINE_EXTERN
- def_bool n
+ def_bool y if EXPOLINE
depends on EXPOLINE
depends on CC_IS_GCC && GCC_VERSION >= 110200
depends on $(success,$(srctree)/arch/s390/tools/gcc-thunk-extern.sh $(CC))
@@ -590,18 +593,6 @@ config RELOCATABLE
Note: this option exists only for documentation purposes, please do
not remove it.
-config PIE_BUILD
- def_bool CC_IS_CLANG && !$(cc-option,-munaligned-symbols)
- help
- If the compiler is unable to generate code that can manage unaligned
- symbols, the kernel is linked as a position-independent executable
- (PIE) and includes dynamic relocations that are processed early
- during bootup.
-
- For kpatch functionality, it is recommended to build the kernel
- without the PIE_BUILD option. PIE_BUILD is only enabled when the
- compiler lacks proper support for handling unaligned symbols.
-
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image (KASLR)"
default y
@@ -611,6 +602,25 @@ config RANDOMIZE_BASE
as a security feature that deters exploit attempts relying on
knowledge of the location of kernel internals.
+config KERNEL_IMAGE_BASE
+ hex "Kernel image base address"
+ range 0x100000 0x1FFFFFE0000000 if !KASAN
+ range 0x100000 0x1BFFFFE0000000 if KASAN
+ default 0x3FFE0000000 if !KASAN
+ default 0x7FFFE0000000 if KASAN
+ help
+ This is the address at which the kernel image is loaded in case
+ Kernel Address Space Layout Randomization (KASLR) is disabled.
+
+ In case the Protected virtualization guest support is enabled the
+ Ultravisor imposes a virtual address limit. If the value of this
+ option leads to the kernel image exceeding the Ultravisor limit,
+ this option is ignored and the image is loaded below the limit.
+
+ If the value of this option leads to the kernel image overlapping
+ the virtual memory where other data structures are located, this
+ option is ignored and the image is loaded above the structures.
+
endmenu
menu "Memory setup"
@@ -724,6 +734,33 @@ config EADM_SCH
To compile this driver as a module, choose M here: the
module will be called eadm_sch.
+config AP
+ def_tristate y
+ prompt "Support for Adjunct Processors (ap)"
+ help
+ This driver allows usage to Adjunct Processor (AP) devices via
+ the ap bus, cards and queues. Supported Adjunct Processors are
+ the CryptoExpress Cards (CEX).
+
+ To compile this driver as a module, choose M here: the
+ module will be called ap.
+
+ If unsure, say Y (default).
+
+config AP_DEBUG
+ def_bool n
+ prompt "Enable debug features for Adjunct Processor (ap) devices"
+ depends on AP
+ help
+ Say 'Y' here to enable some additional debug features for Adjunct
+ Processor (ap) devices.
+
+ There will be some more sysfs attributes displayed for ap queues.
+
+ Do not enable on production level kernel build.
+
+ If unsure, say N.
+
config VFIO_CCW
def_tristate n
prompt "Support for VFIO-CCW subchannels"
@@ -740,7 +777,7 @@ config VFIO_AP
prompt "VFIO support for AP devices"
depends on KVM
depends on VFIO
- depends on ZCRYPT
+ depends on AP
select VFIO_MDEV
help
This driver grants access to Adjunct Processor (AP) devices
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 2dbb2d2f22f9..f2b21c7a70ef 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -14,14 +14,9 @@ KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
-ifdef CONFIG_PIE_BUILD
-KBUILD_CFLAGS += -fPIE
-LDFLAGS_vmlinux := -pie -z notext
-else
-KBUILD_CFLAGS += $(call cc-option,-munaligned-symbols,)
-LDFLAGS_vmlinux := --emit-relocs --discard-none
+KBUILD_CFLAGS += -fPIC
+LDFLAGS_vmlinux := -no-pie --emit-relocs --discard-none
extra_tools := relocs
-endif
aflags_dwarf := -Wa,-gdwarf-2
KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
ifndef CONFIG_AS_IS_LLVM
@@ -88,7 +83,6 @@ endif
ifdef CONFIG_EXPOLINE
ifdef CONFIG_EXPOLINE_EXTERN
- KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern
else
@@ -167,11 +161,6 @@ vdso_prepare: prepare0
vdso-install-y += arch/s390/kernel/vdso64/vdso64.so.dbg
vdso-install-$(CONFIG_COMPAT) += arch/s390/kernel/vdso32/vdso32.so.dbg
-ifdef CONFIG_EXPOLINE_EXTERN
-modules_prepare: expoline_prepare
-expoline_prepare: scripts
- $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
-endif
endif
# Don't use tabs in echo arguments
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 294f08a8811a..070c9b2e905f 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,8 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o
-obj-y += $(if $(CONFIG_PIE_BUILD),machine_kexec_reloc.o,relocs.o)
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
@@ -49,9 +48,7 @@ targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y
targets += vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
targets += vmlinux.bin.zst info.bin syms.bin vmlinux.syms $(obj-all)
-ifndef CONFIG_PIE_BUILD
targets += relocs.S
-endif
OBJECTS := $(addprefix $(obj)/,$(obj-y))
OBJECTS_ALL := $(addprefix $(obj)/,$(obj-all))
@@ -110,13 +107,11 @@ OBJCOPYFLAGS_vmlinux.bin := -O binary --remove-section=.comment --remove-section
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-ifndef CONFIG_PIE_BUILD
CMD_RELOCS=arch/s390/tools/relocs
-quiet_cmd_relocs = RELOCS $@
+quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $< > $@
$(obj)/relocs.S: vmlinux FORCE
$(call if_changed,relocs)
-endif
suffix-$(CONFIG_KERNEL_GZIP) := .gz
suffix-$(CONFIG_KERNEL_BZIP2) := .bz2
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 567d60f78bbc..18027fdc92b0 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -17,7 +17,6 @@ struct machine_info {
};
struct vmlinux_info {
- unsigned long default_lma;
unsigned long entry;
unsigned long image_size; /* does not include .bss */
unsigned long bss_size; /* uncompressed image .bss size */
@@ -25,14 +24,8 @@ struct vmlinux_info {
unsigned long bootdata_size;
unsigned long bootdata_preserved_off;
unsigned long bootdata_preserved_size;
-#ifdef CONFIG_PIE_BUILD
- unsigned long dynsym_start;
- unsigned long rela_dyn_start;
- unsigned long rela_dyn_end;
-#else
unsigned long got_start;
unsigned long got_end;
-#endif
unsigned long amode31_size;
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
@@ -74,10 +67,11 @@ void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
unsigned long randomize_within_range(unsigned long size, unsigned long align,
unsigned long min, unsigned long max);
-void setup_vmem(unsigned long asce_limit);
+void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m);
+int get_random(unsigned long limit, unsigned long *value);
extern struct machine_info machine;
@@ -98,6 +92,10 @@ extern struct vmlinux_info _vmlinux_info;
#define vmlinux _vmlinux_info
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
+#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset))
+#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys)
+#define __identity_va(x) ((void *)((unsigned long)(x) + __identity_base))
+#define __identity_pa(x) ((unsigned long)(x) - __identity_base)
static inline bool intersects(unsigned long addr0, unsigned long size0,
unsigned long addr1, unsigned long size1)
diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c
index d762733a0753..f478e8e9cbda 100644
--- a/arch/s390/boot/decompressor.c
+++ b/arch/s390/boot/decompressor.c
@@ -63,24 +63,13 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#include "../../../../lib/decompress_unzstd.c"
#endif
-#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
-
unsigned long mem_safe_offset(void)
{
- /*
- * due to 4MB HEAD_SIZE for bzip2
- * 'decompress_offset + vmlinux.image_size' could be larger than
- * kernel at final position + its .bss, so take the larger of two
- */
- return max(decompress_offset + vmlinux.image_size,
- vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size);
+ return ALIGN(free_mem_end_ptr, PAGE_SIZE);
}
-void *decompress_kernel(void)
+void deploy_kernel(void *output)
{
- void *output = (void *)decompress_offset;
-
__decompress(_compressed_start, _compressed_end - _compressed_start,
NULL, NULL, output, vmlinux.image_size, NULL, error);
- return output;
}
diff --git a/arch/s390/boot/decompressor.h b/arch/s390/boot/decompressor.h
index 92b81d2ea35d..4f966f06bd65 100644
--- a/arch/s390/boot/decompressor.h
+++ b/arch/s390/boot/decompressor.h
@@ -2,11 +2,9 @@
#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
#define BOOT_COMPRESSED_DECOMPRESSOR_H
-#ifdef CONFIG_KERNEL_UNCOMPRESSED
-static inline void *decompress_kernel(void) { return NULL; }
-#else
-void *decompress_kernel(void);
-#endif
+#ifndef CONFIG_KERNEL_UNCOMPRESSED
unsigned long mem_safe_offset(void);
+void deploy_kernel(void *output);
+#endif
#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 90602101e2ae..bd3bf5ef472d 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -43,7 +43,7 @@ static int check_prng(void)
return PRNG_MODE_TDES;
}
-static int get_random(unsigned long limit, unsigned long *value)
+int get_random(unsigned long limit, unsigned long *value)
{
struct prng_parm prng = {
/* initial parameter block for tdes mode, copied from libica */
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 97244cd7a206..ea96275b0380 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -153,8 +153,10 @@ void print_pgm_check_info(void)
decompressor_printk("Kernel command line: %s\n", early_command_line);
decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n",
S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1);
- if (kaslr_enabled())
+ if (kaslr_enabled()) {
decompressor_printk("Kernel random base: %lx\n", __kaslr_offset);
+ decompressor_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys);
+ }
decompressor_printk("PSW : %016lx %016lx (%pS)\n",
S390_lowcore.psw_save_area.mask,
S390_lowcore.psw_save_area.addr,
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 6cf89314209a..467283b112cd 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -3,6 +3,7 @@
#include <linux/elf.h>
#include <asm/page-states.h>
#include <asm/boot_data.h>
+#include <asm/extmem.h>
#include <asm/sections.h>
#include <asm/maccess.h>
#include <asm/cpu_mf.h>
@@ -18,7 +19,7 @@
#include "boot.h"
#include "uv.h"
-unsigned long __bootdata_preserved(__kaslr_offset);
+struct vm_layout __bootdata_preserved(vm_layout);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata_preserved(__memcpy_real_area);
pte_t *__bootdata_preserved(memcpy_real_ptep);
@@ -29,7 +30,6 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
-unsigned long __bootdata(ident_map_size);
u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]);
@@ -109,9 +109,19 @@ static void setup_lpp(void)
}
#ifdef CONFIG_KERNEL_UNCOMPRESSED
-unsigned long mem_safe_offset(void)
+static unsigned long mem_safe_offset(void)
{
- return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+ return (unsigned long)_compressed_start;
+}
+
+static void deploy_kernel(void *output)
+{
+ void *uncompressed_start = (void *)_compressed_start;
+
+ if (output == uncompressed_start)
+ return;
+ memmove(output, uncompressed_start, vmlinux.image_size);
+ memset(uncompressed_start, 0, vmlinux.image_size);
}
#endif
@@ -141,70 +151,18 @@ static void copy_bootdata(void)
memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
}
-#ifdef CONFIG_PIE_BUILD
-static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
-{
- Elf64_Rela *rela_start, *rela_end, *rela;
- int r_type, r_sym, rc;
- Elf64_Addr loc, val;
- Elf64_Sym *dynsym;
-
- rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
- rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
- dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
- for (rela = rela_start; rela < rela_end; rela++) {
- loc = rela->r_offset + offset;
- val = rela->r_addend;
- r_sym = ELF64_R_SYM(rela->r_info);
- if (r_sym) {
- if (dynsym[r_sym].st_shndx != SHN_UNDEF)
- val += dynsym[r_sym].st_value + offset;
- } else {
- /*
- * 0 == undefined symbol table index (STN_UNDEF),
- * used for R_390_RELATIVE, only add KASLR offset
- */
- val += offset;
- }
- r_type = ELF64_R_TYPE(rela->r_info);
- rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
- if (rc)
- error("Unknown relocation type");
- }
-}
-
-static void kaslr_adjust_got(unsigned long offset) {}
-static void rescue_relocs(void) {}
-static void free_relocs(void) {}
-#else
-static int *vmlinux_relocs_64_start;
-static int *vmlinux_relocs_64_end;
-
-static void rescue_relocs(void)
-{
- unsigned long size = __vmlinux_relocs_64_end - __vmlinux_relocs_64_start;
-
- vmlinux_relocs_64_start = (void *)physmem_alloc_top_down(RR_RELOC, size, 0);
- vmlinux_relocs_64_end = (void *)vmlinux_relocs_64_start + size;
- memmove(vmlinux_relocs_64_start, __vmlinux_relocs_64_start, size);
-}
-
-static void free_relocs(void)
-{
- physmem_free(RR_RELOC);
-}
-
-static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, unsigned long offset)
+static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
+ unsigned long offset, unsigned long phys_offset)
{
int *reloc;
long loc;
/* Adjust R_390_64 relocations */
- for (reloc = vmlinux_relocs_64_start; reloc < vmlinux_relocs_64_end; reloc++) {
- loc = (long)*reloc + offset;
+ for (reloc = (int *)__vmlinux_relocs_64_start; reloc < (int *)__vmlinux_relocs_64_end; reloc++) {
+ loc = (long)*reloc + phys_offset;
if (loc < min_addr || loc > max_addr)
error("64-bit relocation outside of kernel!\n");
- *(u64 *)loc += offset;
+ *(u64 *)loc += offset - __START_KERNEL;
}
}
@@ -217,9 +175,8 @@ static void kaslr_adjust_got(unsigned long offset)
* reason. Adjust the GOT entries.
*/
for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
- *entry += offset;
+ *entry += offset - __START_KERNEL;
}
-#endif
/*
* Merge information from several sources into a single ident_map_size value.
@@ -261,9 +218,26 @@ static void setup_ident_map_size(unsigned long max_physmem_end)
#endif
}
-static unsigned long setup_kernel_memory_layout(void)
+#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
+
+static unsigned long get_vmem_size(unsigned long identity_size,
+ unsigned long vmemmap_size,
+ unsigned long vmalloc_size,
+ unsigned long rte_size)
+{
+ unsigned long max_mappable, vsize;
+
+ max_mappable = max(identity_size, MAX_DCSS_ADDR);
+ vsize = round_up(SZ_2G + max_mappable, rte_size) +
+ round_up(vmemmap_size, rte_size) +
+ FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
+ return size_add(vsize, vmalloc_size);
+}
+
+static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
{
unsigned long vmemmap_start;
+ unsigned long kernel_start;
unsigned long asce_limit;
unsigned long rte_size;
unsigned long pages;
@@ -275,12 +249,19 @@ static unsigned long setup_kernel_memory_layout(void)
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */
- vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size +
- MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE;
- vsize = size_add(vsize, vmalloc_size);
- if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) {
+ BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
+ BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
+ vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
+ if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
+ (vsize > _REGION2_SIZE && kaslr_enabled())) {
asce_limit = _REGION1_SIZE;
- rte_size = _REGION2_SIZE;
+ if (__NO_KASLR_END_KERNEL > _REGION2_SIZE) {
+ rte_size = _REGION2_SIZE;
+ vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION2_SIZE);
+ } else {
+ rte_size = _REGION3_SIZE;
+ }
} else {
asce_limit = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
@@ -290,38 +271,67 @@ static unsigned long setup_kernel_memory_layout(void)
* Forcing modules and vmalloc area under the ultravisor
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
+ *
+ * Assume the secure storage limit always exceeds _REGION2_SIZE,
+ * otherwise asce_limit and rte_size would have been adjusted.
*/
vmax = adjust_to_uv_max(asce_limit);
#ifdef CONFIG_KASAN
+ BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
- __memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE);
- __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
- sizeof(struct lowcore));
- MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
+ vsize = min(vsize, vmax);
+ if (kaslr_enabled()) {
+ unsigned long kernel_end, kaslr_len, slots, pos;
+
+ kaslr_len = max(KASLR_LEN, vmax - vsize);
+ slots = DIV_ROUND_UP(kaslr_len - kernel_size, THREAD_SIZE);
+ if (get_random(slots, &pos))
+ pos = 0;
+ kernel_end = vmax - pos * THREAD_SIZE;
+ kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
+ } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
+ kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
+ decompressor_printk("The kernel base address is forced to %lx\n", kernel_start);
+ } else {
+ kernel_start = __NO_KASLR_START_KERNEL;
+ }
+ __kaslr_offset = kernel_start;
+
+ MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
- vsize = round_down(VMALLOC_END / 2, _SEGMENT_SIZE);
+ vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
+ vsize = round_down(vsize, _SEGMENT_SIZE);
vmalloc_size = min(vmalloc_size, vsize);
VMALLOC_START = VMALLOC_END - vmalloc_size;
+ __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
+ __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+ sizeof(struct lowcore));
+
/* split remaining virtual space between 1:1 mapping & vmemmap array */
- pages = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
pages = SECTION_ALIGN_UP(pages);
/* keep vmemmap_start aligned to a top level region table entry */
- vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
- vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
- /* maximum mappable address as seen by arch_get_mappable_range() */
- max_mappable = vmemmap_start;
+ vmemmap_start = round_down(__abs_lowcore - pages * sizeof(struct page), rte_size);
/* make sure identity map doesn't overlay with vmemmap */
ident_map_size = min(ident_map_size, vmemmap_start);
vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
- /* make sure vmemmap doesn't overlay with vmalloc area */
- VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
+ /* make sure vmemmap doesn't overlay with absolute lowcore area */
+ if (vmemmap_start + vmemmap_size > __abs_lowcore) {
+ vmemmap_size = SECTION_ALIGN_DOWN(ident_map_size / PAGE_SIZE) * sizeof(struct page);
+ ident_map_size = vmemmap_size / sizeof(struct page) * PAGE_SIZE;
+ }
vmemmap = (struct page *)vmemmap_start;
+ /* maximum address for which linear mapping could be created (DCSS, memory) */
+ BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
+ max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
+ max_mappable = min(max_mappable, vmemmap_start);
+ __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
return asce_limit;
}
@@ -329,9 +339,9 @@ static unsigned long setup_kernel_memory_layout(void)
/*
* This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
*/
-static void clear_bss_section(unsigned long vmlinux_lma)
+static void clear_bss_section(unsigned long kernel_start)
{
- memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size);
+ memset((void *)kernel_start + vmlinux.image_size, 0, vmlinux.bss_size);
}
/*
@@ -348,19 +358,12 @@ static void setup_vmalloc_size(void)
vmalloc_size = max(size, vmalloc_size);
}
-static void kaslr_adjust_vmlinux_info(unsigned long offset)
+static void kaslr_adjust_vmlinux_info(long offset)
{
- *(unsigned long *)(&vmlinux.entry) += offset;
vmlinux.bootdata_off += offset;
vmlinux.bootdata_preserved_off += offset;
-#ifdef CONFIG_PIE_BUILD
- vmlinux.rela_dyn_start += offset;
- vmlinux.rela_dyn_end += offset;
- vmlinux.dynsym_start += offset;
-#else
vmlinux.got_start += offset;
vmlinux.got_end += offset;
-#endif
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
@@ -373,23 +376,30 @@ static void kaslr_adjust_vmlinux_info(unsigned long offset)
#endif
}
+static void fixup_vmlinux_info(void)
+{
+ vmlinux.entry -= __START_KERNEL;
+ kaslr_adjust_vmlinux_info(-__START_KERNEL);
+}
+
void startup_kernel(void)
{
- unsigned long max_physmem_end;
- unsigned long vmlinux_lma = 0;
+ unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
+ unsigned long nokaslr_offset_phys = mem_safe_offset();
unsigned long amode31_lma = 0;
+ unsigned long max_physmem_end;
unsigned long asce_limit;
unsigned long safe_addr;
- void *img;
psw_t psw;
+ fixup_vmlinux_info();
setup_lpp();
- safe_addr = mem_safe_offset();
+ safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
/*
- * Reserve decompressor memory together with decompression heap, buffer and
- * memory which might be occupied by uncompressed kernel at default 1Mb
- * position (if KASLR is off or failed).
+ * Reserve decompressor memory together with decompression heap,
+ * buffer and memory which might be occupied by uncompressed kernel
+ * (if KASLR is off or failed).
*/
physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
@@ -409,40 +419,39 @@ void startup_kernel(void)
max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end);
setup_vmalloc_size();
- asce_limit = setup_kernel_memory_layout();
+ asce_limit = setup_kernel_memory_layout(kernel_size);
/* got final ident_map_size, physmem allocations could be performed now */
physmem_set_usable_limit(ident_map_size);
detect_physmem_online_ranges(max_physmem_end);
save_ipl_cert_comp_list();
rescue_initrd(safe_addr, ident_map_size);
- rescue_relocs();
- if (kaslr_enabled()) {
- vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size,
- THREAD_SIZE, vmlinux.default_lma,
- ident_map_size);
- if (vmlinux_lma) {
- __kaslr_offset = vmlinux_lma - vmlinux.default_lma;
- kaslr_adjust_vmlinux_info(__kaslr_offset);
- }
- }
- vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma;
- physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size);
-
- if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
- img = decompress_kernel();
- memmove((void *)vmlinux_lma, img, vmlinux.image_size);
- } else if (__kaslr_offset) {
- img = (void *)vmlinux.default_lma;
- memmove((void *)vmlinux_lma, img, vmlinux.image_size);
- memset(img, 0, vmlinux.image_size);
- }
+ if (kaslr_enabled())
+ __kaslr_offset_phys = randomize_within_range(kernel_size, THREAD_SIZE, 0, ident_map_size);
+ if (!__kaslr_offset_phys)
+ __kaslr_offset_phys = nokaslr_offset_phys;
+ kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
+ physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
+ deploy_kernel((void *)__kaslr_offset_phys);
/* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
+
+ /*
+ * In case KASLR is enabled the randomized location of .amode31
+ * section might overlap with .vmlinux.relocs section. To avoid that
+ * the below randomize_within_range() could have been called with
+ * __vmlinux_relocs_64_end as the lower range address. However,
+ * .amode31 section is written to by the decompressed kernel - at
+ * that time the contents of .vmlinux.relocs is not needed anymore.
+ * Conversly, .vmlinux.relocs is read only by the decompressor, even
+ * before the kernel started. Therefore, in case the two sections
+ * overlap there is no risk of corrupting any data.
+ */
if (kaslr_enabled())
amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
- amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size;
+ if (!amode31_lma)
+ amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/*
@@ -458,23 +467,23 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes
* to bootdata made by setup_vmem()
*/
- clear_bss_section(vmlinux_lma);
- kaslr_adjust_relocs(vmlinux_lma, vmlinux_lma + vmlinux.image_size, __kaslr_offset);
+ clear_bss_section(__kaslr_offset_phys);
+ kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size,
+ __kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
- free_relocs();
- setup_vmem(asce_limit);
+ setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
copy_bootdata();
/*
* Save KASLR offset for early dumps, before vmcore_info is set.
* Mark as uneven to distinguish from real vmcore_info pointer.
*/
- S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0;
+ S390_lowcore.vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
/*
* Jump to the decompressed kernel entry point and switch DAT mode on.
*/
- psw.addr = vmlinux.entry;
+ psw.addr = __kaslr_offset + vmlinux.entry;
psw.mask = PSW_KERNEL_BITS;
__load_psw(psw);
}
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 09b10bb6e4d0..96d48b7112d4 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -27,6 +27,8 @@ enum populate_mode {
POPULATE_NONE,
POPULATE_DIRECT,
POPULATE_ABS_LOWCORE,
+ POPULATE_IDENTITY,
+ POPULATE_KERNEL,
#ifdef CONFIG_KASAN
POPULATE_KASAN_MAP_SHADOW,
POPULATE_KASAN_ZERO_SHADOW,
@@ -54,7 +56,7 @@ static inline void kasan_populate(unsigned long start, unsigned long end, enum p
pgtable_populate(start, end, mode);
}
-static void kasan_populate_shadow(void)
+static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
{
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
@@ -76,44 +78,20 @@ static void kasan_populate_shadow(void)
__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat(kasan_early_shadow_pte, 1);
- /*
- * Current memory layout:
- * +- 0 -------------+ +- shadow start -+
- * |1:1 ident mapping| /|1/8 of ident map|
- * | | / | |
- * +-end of ident map+ / +----------------+
- * | ... gap ... | / | kasan |
- * | | / | zero page |
- * +- vmalloc area -+ / | mapping |
- * | vmalloc_size | / | (untracked) |
- * +- modules vaddr -+ / +----------------+
- * | 2Gb |/ | unmapped | allocated per module
- * +- shadow start -+ +----------------+
- * | 1/8 addr space | | zero pg mapping| (untracked)
- * +- shadow end ----+---------+- shadow end ---+
- *
- * Current memory layout (KASAN_VMALLOC):
- * +- 0 -------------+ +- shadow start -+
- * |1:1 ident mapping| /|1/8 of ident map|
- * | | / | |
- * +-end of ident map+ / +----------------+
- * | ... gap ... | / | kasan zero page| (untracked)
- * | | / | mapping |
- * +- vmalloc area -+ / +----------------+
- * | vmalloc_size | / |shallow populate|
- * +- modules vaddr -+ / +----------------+
- * | 2Gb |/ |shallow populate|
- * +- shadow start -+ +----------------+
- * | 1/8 addr space | | zero pg mapping| (untracked)
- * +- shadow end ----+---------+- shadow end ---+
- */
-
for_each_physmem_usable_range(i, &start, &end) {
- kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW);
- if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260)
- kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate((unsigned long)__identity_va(start),
+ (unsigned long)__identity_va(end),
+ POPULATE_KASAN_MAP_SHADOW);
+ if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) {
+ kasan_populate((unsigned long)__identity_va(memgap_start),
+ (unsigned long)__identity_va(start),
+ POPULATE_KASAN_ZERO_SHADOW);
+ }
memgap_start = end;
}
+ kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
@@ -122,8 +100,9 @@ static void kasan_populate_shadow(void)
untracked_end = MODULES_VADDR;
}
/* populate kasan shadow for untracked memory */
- kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW);
- kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end,
+ POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
@@ -180,7 +159,9 @@ static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode)
}
#else
-static inline void kasan_populate_shadow(void) {}
+static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end)
+{
+}
static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
unsigned long end, enum populate_mode mode)
@@ -263,6 +244,10 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return addr;
case POPULATE_ABS_LOWCORE:
return __abs_lowcore_pa(addr);
+ case POPULATE_KERNEL:
+ return __kernel_pa(addr);
+ case POPULATE_IDENTITY:
+ return __identity_pa(addr);
#ifdef CONFIG_KASAN
case POPULATE_KASAN_MAP_SHADOW:
addr = physmem_alloc_top_down(RR_VMEM, size, size);
@@ -274,15 +259,22 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
}
}
-static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
+static bool large_allowed(enum populate_mode mode)
+{
+ return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY);
+}
+
+static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- return machine.has_edat2 &&
+ return machine.has_edat2 && large_allowed(mode) &&
IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
}
-static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
+static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
+ enum populate_mode mode)
{
- return machine.has_edat1 &&
+ return machine.has_edat1 && large_allowed(mode) &&
IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
}
@@ -322,7 +314,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
if (pmd_none(*pmd)) {
if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode))
continue;
- if (can_large_pmd(pmd, addr, next)) {
+ if (can_large_pmd(pmd, addr, next, mode)) {
entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
entry = set_pmd_bit(entry, SEGMENT_KERNEL);
if (!machine.has_nx)
@@ -355,7 +347,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
if (pud_none(*pud)) {
if (kasan_pud_populate_zero_shadow(pud, addr, next, mode))
continue;
- if (can_large_pud(pud, addr, next)) {
+ if (can_large_pud(pud, addr, next, mode)) {
entry = __pud(_pa(addr, _REGION3_SIZE, mode));
entry = set_pud_bit(entry, REGION3_KERNEL);
if (!machine.has_nx)
@@ -418,11 +410,12 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
}
}
-void setup_vmem(unsigned long asce_limit)
+void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
{
unsigned long start, end;
unsigned long asce_type;
unsigned long asce_bits;
+ pgd_t *init_mm_pgd;
int i;
/*
@@ -433,6 +426,15 @@ void setup_vmem(unsigned long asce_limit)
for_each_physmem_online_range(i, &start, &end)
__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
+ /*
+ * init_mm->pgd contains virtual address of swapper_pg_dir.
+ * It is unusable at this stage since DAT is yet off. Swap
+ * it for physical address of swapper_pg_dir and restore
+ * the virtual address after all page tables are created.
+ */
+ init_mm_pgd = init_mm.pgd;
+ init_mm.pgd = (pgd_t *)swapper_pg_dir;
+
if (asce_limit == _REGION1_SIZE) {
asce_type = _REGION2_ENTRY_EMPTY;
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@@ -453,15 +455,20 @@ void setup_vmem(unsigned long asce_limit)
* the lowcore and create the identity mapping only afterwards.
*/
pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
- for_each_physmem_usable_range(i, &start, &end)
- pgtable_populate(start, end, POPULATE_DIRECT);
+ for_each_physmem_usable_range(i, &start, &end) {
+ pgtable_populate((unsigned long)__identity_va(start),
+ (unsigned long)__identity_va(end),
+ POPULATE_IDENTITY);
+ }
+ pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL);
+ pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE);
pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE,
POPULATE_NONE);
- memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area);
+ memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area));
- kasan_populate_shadow();
+ kasan_populate_shadow(kernel_start, kernel_end);
S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits;
S390_lowcore.user_asce = s390_invalid_asce;
@@ -471,4 +478,5 @@ void setup_vmem(unsigned long asce_limit)
local_ctl_load(13, &S390_lowcore.kernel_asce);
init_mm.context.asce = S390_lowcore.kernel_asce.val;
+ init_mm.pgd = init_mm_pgd;
}
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 3d7ea585ab99..1fe5a1d3ff60 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -99,8 +99,16 @@ SECTIONS
_decompressor_end = .;
+ . = ALIGN(4);
+ .vmlinux.relocs : {
+ __vmlinux_relocs_64_start = .;
+ *(.vmlinux.relocs_64)
+ __vmlinux_relocs_64_end = .;
+ }
+
#ifdef CONFIG_KERNEL_UNCOMPRESSED
- . = 0x100000;
+ . = ALIGN(PAGE_SIZE);
+ . += AMODE31_SIZE; /* .amode31 section */
#else
. = ALIGN(8);
#endif
@@ -110,24 +118,6 @@ SECTIONS
_compressed_end = .;
}
-#ifndef CONFIG_PIE_BUILD
- /*
- * When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
- * uncompressed vmlinux.bin is positioned in the bzImage decompressor
- * image at the default kernel LMA of 0x100000, enabling it to be
- * executed in-place. However, the size of .vmlinux.relocs could be
- * large enough to cause an overlap with the uncompressed kernel at the
- * address 0x100000. To address this issue, .vmlinux.relocs is
- * positioned after the .rodata.compressed.
- */
- . = ALIGN(4);
- .vmlinux.relocs : {
- __vmlinux_relocs_64_start = .;
- *(.vmlinux.relocs_64)
- __vmlinux_relocs_64_end = .;
- }
-#endif
-
#define SB_TRAILER_SIZE 32
/* Trailer needed for Secure Boot */
. += SB_TRAILER_SIZE; /* make sure .sb.trailer does not overwrite the previous section */
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index f1fb01cd5a25..145342e46ea8 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -760,7 +760,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_STATS=y
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index d33f814f78b2..dc237896f99d 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -745,7 +745,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_STATS=y
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 99f7e1f2b70a..99ea3f12c5d2 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -125,8 +125,19 @@ struct s390_pxts_ctx {
static inline int __paes_keyblob2pkey(struct key_blob *kb,
struct pkey_protkey *pk)
{
- return pkey_keyblob2pkey(kb->key, kb->keylen,
- pk->protkey, &pk->len, &pk->type);
+ int i, ret = -EIO;
+
+ /* try three times in case of busy card */
+ for (i = 0; ret && i < 3; i++) {
+ if (ret == -EBUSY && in_task()) {
+ if (msleep_interruptible(1000))
+ return -EINTR;
+ }
+ ret = pkey_keyblob2pkey(kb->key, kb->keylen,
+ pk->protkey, &pk->len, &pk->type);
+ }
+
+ return ret;
}
static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index 43ac4a64f49b..395b02d6a133 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -223,13 +223,18 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid, int fbit)
* config info as returned by the ap_qci() function.
*/
struct ap_config_info {
- unsigned int apsc : 1; /* S bit */
- unsigned int apxa : 1; /* N bit */
- unsigned int qact : 1; /* C bit */
- unsigned int rc8a : 1; /* R bit */
- unsigned int : 4;
- unsigned int apsb : 1; /* B bit */
- unsigned int : 23;
+ union {
+ unsigned int flags;
+ struct {
+ unsigned int apsc : 1; /* S bit */
+ unsigned int apxa : 1; /* N bit */
+ unsigned int qact : 1; /* C bit */
+ unsigned int rc8a : 1; /* R bit */
+ unsigned int : 4;
+ unsigned int apsb : 1; /* B bit */
+ unsigned int : 23;
+ };
+ };
unsigned char na; /* max # of APs - 1 */
unsigned char nd; /* max # of Domains - 1 */
unsigned char _reserved0[10];
@@ -544,15 +549,4 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid,
return reg1.status;
}
-/*
- * Interface to tell the AP bus code that a configuration
- * change has happened. The bus code should at least do
- * an ap bus resource rescan.
- */
-#if IS_ENABLED(CONFIG_ZCRYPT)
-void ap_bus_cfg_chg(void);
-#else
-static inline void ap_bus_cfg_chg(void){}
-#endif
-
#endif /* _ASM_S390_AP_H_ */
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
index 56096ae26f29..f662eb4b9246 100644
--- a/arch/s390/include/asm/asm-prototypes.h
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include <linux/ftrace.h>
#include <asm/fpu.h>
+#include <asm/nospec-branch.h>
#include <asm-generic/asm-prototypes.h>
__int128_t __ashlti3(__int128_t a, int b);
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7138d189cc42..0c4cad7d5a5b 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,31 +15,31 @@
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-static inline int arch_atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
{
return __atomic_read(v);
}
#define arch_atomic_read arch_atomic_read
-static inline void arch_atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__atomic_set(v, i);
}
#define arch_atomic_set arch_atomic_set
-static inline int arch_atomic_add_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter) + i;
}
#define arch_atomic_add_return arch_atomic_add_return
-static inline int arch_atomic_fetch_add(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter);
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static inline void arch_atomic_add(int i, atomic_t *v)
+static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
__atomic_add(i, &v->counter);
}
@@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v)
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
#define ATOMIC_OPS(op) \
-static inline void arch_atomic_##op(int i, atomic_t *v) \
+static __always_inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
__atomic_##op(i, &v->counter); \
} \
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
+static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
return __atomic_##op##_barrier(i, &v->counter); \
}
@@ -74,7 +74,7 @@ ATOMIC_OPS(xor)
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
return __atomic_cmpxchg(&v->counter, old, new);
}
@@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
#define ATOMIC64_INIT(i) { (i) }
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __atomic64_read(v);
}
#define arch_atomic64_read arch_atomic64_read
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__atomic64_set(v, i);
}
#define arch_atomic64_set arch_atomic64_set
-static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter) + i;
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter);
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static inline void arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__atomic64_add(i, (long *)&v->counter);
}
@@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v)
#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return __atomic64_cmpxchg((long *)&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-#define ATOMIC64_OPS(op) \
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
-{ \
- __atomic64_##op(i, (long *)&v->counter); \
-} \
-static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
-{ \
- return __atomic64_##op##_barrier(i, (long *)&v->counter); \
+#define ATOMIC64_OPS(op) \
+static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
+{ \
+ __atomic64_##op(i, (long *)&v->counter); \
+} \
+static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
+{ \
+ return __atomic64_##op##_barrier(i, (long *)&v->counter); \
}
ATOMIC64_OPS(and)
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 50510e08b893..7fa5f96a553a 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -8,7 +8,7 @@
#ifndef __ARCH_S390_ATOMIC_OPS__
#define __ARCH_S390_ATOMIC_OPS__
-static inline int __atomic_read(const atomic_t *v)
+static __always_inline int __atomic_read(const atomic_t *v)
{
int c;
@@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v)
return c;
}
-static inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(atomic_t *v, int i)
{
asm volatile(
" st %1,%0\n"
: "=R" (v->counter) : "d" (i));
}
-static inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline s64 __atomic64_read(const atomic64_t *v)
{
s64 c;
@@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v)
return c;
}
-static inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
{
asm volatile(
" stg %1,%0\n"
@@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i)
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
-static inline op_type op_name(op_type val, op_type *ptr) \
+static __always_inline op_type op_name(op_type val, op_type *ptr) \
{ \
op_type old; \
\
@@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#define __ATOMIC_OP(op_name, op_string) \
-static inline int op_name(int val, int *ptr) \
+static __always_inline int op_name(int val, int *ptr) \
{ \
int old, new; \
\
@@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr")
#undef __ATOMIC_OPS
#define __ATOMIC64_OP(op_name, op_string) \
-static inline long op_name(long val, long *ptr) \
+static __always_inline long op_name(long val, long *ptr) \
{ \
long old, new; \
\
@@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new)
{
asm volatile(
" cs %[old],%[new],%[ptr]"
@@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new)
return old;
}
-static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
+static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{
int old_expected = old;
@@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
return old == old_expected;
}
-static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
{
asm volatile(
" csg %[old],%[new],%[ptr]"
@@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
return old;
}
-static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
{
long old_expected = old;
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
index bb48ea380c0d..bb78159d8042 100644
--- a/arch/s390/include/asm/chsc.h
+++ b/arch/s390/include/asm/chsc.h
@@ -11,6 +11,9 @@
#include <uapi/asm/chsc.h>
+/* struct from linux/notifier.h */
+struct notifier_block;
+
/**
* Operation codes for CHSC PNSO:
* PNSO_OC_NET_BRIDGE_INFO - only addresses that are visible to a bridgeport
@@ -66,4 +69,16 @@ struct chsc_pnso_area {
struct chsc_pnso_naid_l2 entries[];
} __packed __aligned(PAGE_SIZE);
+/*
+ * notifier interface - registered notifiers gets called on
+ * the following events:
+ * - ap config changed (CHSC_NOTIFY_AP_CFG)
+ */
+enum chsc_notify_type {
+ CHSC_NOTIFY_AP_CFG = 3,
+};
+
+int chsc_notifier_register(struct notifier_block *nb);
+int chsc_notifier_unregister(struct notifier_block *nb);
+
#endif /* _ASM_S390_CHSC_H */
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
index 4f21ae561e4d..390906b8e386 100644
--- a/arch/s390/include/asm/dwarf.h
+++ b/arch/s390/include/asm/dwarf.h
@@ -9,6 +9,7 @@
#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
#define CFI_RESTORE .cfi_restore
+#define CFI_REL_OFFSET .cfi_rel_offset
#ifdef CONFIG_AS_CFI_VAL_OFFSET
#define CFI_VAL_OFFSET .cfi_val_offset
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
index 568fd81bb77b..e0a06060afdd 100644
--- a/arch/s390/include/asm/extmem.h
+++ b/arch/s390/include/asm/extmem.h
@@ -8,6 +8,13 @@
#define _ASM_S390X_DCSS_H
#ifndef __ASSEMBLY__
+/*
+ * DCSS segment is defined as a contiguous range of pages using DEFSEG command.
+ * The range start and end is a page number with a value less than or equal to
+ * 0x7ffffff (see CP Commands and Utilities Reference).
+ */
+#define MAX_DCSS_ADDR (512UL * SZ_1G)
+
/* possible values for segment type as returned by segment_info */
#define SEG_TYPE_SW 0
#define SEG_TYPE_EW 1
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 621f23d5ae30..77e479d44f1e 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -8,12 +8,8 @@
#ifndef __ASSEMBLY__
-#ifdef CONFIG_CC_IS_CLANG
-/* https://llvm.org/pr41424 */
-#define ftrace_return_address(n) 0UL
-#else
-#define ftrace_return_address(n) __builtin_return_address(n)
-#endif
+unsigned long return_address(unsigned int n);
+#define ftrace_return_address(n) return_address(n)
void ftrace_caller(void);
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 5cc46e0dde62..9725586f4259 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -146,7 +146,7 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int gmap_mark_unmergeable(void);
+int s390_disable_cow_sharing(void);
void s390_unlist_old_asce(struct gmap *gmap);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index deb198a61039..ce5f4fe8be4d 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -39,11 +39,11 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
}
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
}
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index 4453ad7c11ac..0fbc992d7a5e 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -73,6 +73,21 @@ static inline void ioport_unmap(void __iomem *p)
#define __raw_writel zpci_write_u32
#define __raw_writeq zpci_write_u64
+/* combine single writes by using store-block insn */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+ size_t count)
+{
+ zpci_memcpy_toio(to, from, count * 4);
+}
+#define __iowrite32_copy __iowrite32_copy
+
+static inline void __iowrite64_copy(void __iomem *to, const void *from,
+ size_t count)
+{
+ zpci_memcpy_toio(to, from, count * 8);
+}
+#define __iowrite64_copy __iowrite64_copy
+
#endif /* CONFIG_PCI */
#include <asm-generic/io.h>
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bb1b4bef1878..4c2dc7abc285 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -32,6 +32,11 @@ typedef struct {
unsigned int uses_skeys:1;
/* The mmu context uses CMM. */
unsigned int uses_cmm:1;
+ /*
+ * The mmu context allows COW-sharing of memory pages (KSM, zeropage).
+ * Note that COW-sharing during fork() is currently always allowed.
+ */
+ unsigned int allow_cow_sharing:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
} mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 929af18b0908..a7789a9f6218 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -35,6 +35,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
+ mm->context.allow_cow_sharing = 1;
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 82725cf783c7..b9c1f3cae842 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -17,6 +17,26 @@ static inline bool nospec_uses_trampoline(void)
return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
}
+#ifdef CONFIG_EXPOLINE_EXTERN
+
+void __s390_indirect_jump_r1(void);
+void __s390_indirect_jump_r2(void);
+void __s390_indirect_jump_r3(void);
+void __s390_indirect_jump_r4(void);
+void __s390_indirect_jump_r5(void);
+void __s390_indirect_jump_r6(void);
+void __s390_indirect_jump_r7(void);
+void __s390_indirect_jump_r8(void);
+void __s390_indirect_jump_r9(void);
+void __s390_indirect_jump_r10(void);
+void __s390_indirect_jump_r11(void);
+void __s390_indirect_jump_r12(void);
+void __s390_indirect_jump_r13(void);
+void __s390_indirect_jump_r14(void);
+void __s390_indirect_jump_r15(void);
+
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 7a946c42ad13..cb15dd25bf21 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -16,24 +16,25 @@
*/
.macro __THUNK_PROLOG_NAME name
#ifdef CONFIG_EXPOLINE_EXTERN
- .pushsection .text,"ax",@progbits
- __ALIGN
+ SYM_CODE_START(\name)
#else
.pushsection .text.\name,"axG",@progbits,\name,comdat
-#endif
.globl \name
.hidden \name
.type \name,@function
\name:
CFI_STARTPROC
+#endif
.endm
.macro __THUNK_EPILOG_NAME name
- CFI_ENDPROC
#ifdef CONFIG_EXPOLINE_EXTERN
- .size \name, .-\name
-#endif
+ SYM_CODE_END(\name)
+ EXPORT_SYMBOL(\name)
+#else
+ CFI_ENDPROC
.popsection
+#endif
.endm
.macro __THUNK_PROLOG_BR r1
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index a4d2e103f116..3ee9e8f5ceae 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -17,11 +17,25 @@
#define OS_INFO_VMCOREINFO 0
#define OS_INFO_REIPL_BLOCK 1
#define OS_INFO_FLAGS_ENTRY 2
+#define OS_INFO_RESERVED 3
+#define OS_INFO_IDENTITY_BASE 4
+#define OS_INFO_KASLR_OFFSET 5
+#define OS_INFO_KASLR_OFF_PHYS 6
+#define OS_INFO_VMEMMAP 7
+#define OS_INFO_AMODE31_START 8
+#define OS_INFO_AMODE31_END 9
+#define OS_INFO_IMAGE_START 10
+#define OS_INFO_IMAGE_END 11
+#define OS_INFO_IMAGE_PHYS 12
+#define OS_INFO_MAX 13
#define OS_INFO_FLAG_REIPL_CLEAR (1UL << 0)
struct os_info_entry {
- u64 addr;
+ union {
+ u64 addr;
+ u64 val;
+ };
u64 size;
u32 csum;
} __packed;
@@ -33,17 +47,24 @@ struct os_info {
u16 version_minor;
u64 crashkernel_addr;
u64 crashkernel_size;
- struct os_info_entry entry[3];
- u8 reserved[4004];
+ struct os_info_entry entry[OS_INFO_MAX];
+ u8 reserved[3804];
} __packed;
void os_info_init(void);
-void os_info_entry_add(int nr, void *ptr, u64 len);
+void os_info_entry_add_data(int nr, void *ptr, u64 len);
+void os_info_entry_add_val(int nr, u64 val);
void os_info_crashkernel_add(unsigned long base, unsigned long size);
u32 os_info_csum(struct os_info *os_info);
#ifdef CONFIG_CRASH_DUMP
void *os_info_old_entry(int nr, unsigned long *size);
+static inline unsigned long os_info_old_value(int nr)
+{
+ unsigned long size;
+
+ return (unsigned long)os_info_old_entry(nr, &size);
+}
#else
static inline void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 9381879f7ecf..224ff9d433ea 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -178,19 +178,52 @@ int arch_make_page_accessible(struct page *page);
#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
#endif
-#define __PAGE_OFFSET 0x0UL
-#define PAGE_OFFSET 0x0UL
+struct vm_layout {
+ unsigned long kaslr_offset;
+ unsigned long kaslr_offset_phys;
+ unsigned long identity_base;
+ unsigned long identity_size;
+};
-#define __pa_nodebug(x) ((unsigned long)(x))
+extern struct vm_layout vm_layout;
+
+#define __kaslr_offset vm_layout.kaslr_offset
+#define __kaslr_offset_phys vm_layout.kaslr_offset_phys
+#define __identity_base vm_layout.identity_base
+#define ident_map_size vm_layout.identity_size
+
+static inline unsigned long kaslr_offset(void)
+{
+ return __kaslr_offset;
+}
+
+extern int __kaslr_enabled;
+static inline int kaslr_enabled(void)
+{
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return __kaslr_enabled;
+ return 0;
+}
+
+#define __PAGE_OFFSET __identity_base
+#define PAGE_OFFSET __PAGE_OFFSET
#ifdef __DECOMPRESSOR
+#define __pa_nodebug(x) ((unsigned long)(x))
#define __pa(x) __pa_nodebug(x)
#define __pa32(x) __pa(x)
#define __va(x) ((void *)(unsigned long)(x))
#else /* __DECOMPRESSOR */
+static inline unsigned long __pa_nodebug(unsigned long x)
+{
+ if (x < __kaslr_offset)
+ return x - __identity_base;
+ return x - __kaslr_offset + __kaslr_offset_phys;
+}
+
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x, bool is_31bit);
@@ -206,7 +239,7 @@ static inline unsigned long __phys_addr(unsigned long x, bool is_31bit)
#define __pa(x) __phys_addr((unsigned long)(x), false)
#define __pa32(x) __phys_addr((unsigned long)(x), true)
-#define __va(x) ((void *)(unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x) + __identity_base))
#endif /* __DECOMPRESSOR */
@@ -231,7 +264,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
-#define virt_addr_valid(kaddr) pfn_valid(phys_to_pfn(__pa_nodebug(kaddr)))
+#define virt_addr_valid(kaddr) pfn_valid(phys_to_pfn(__pa_nodebug((unsigned long)(kaddr))))
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
@@ -240,4 +273,11 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
+#define AMODE31_SIZE (3 * PAGE_SIZE)
+
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+#define __START_KERNEL 0x100000
+#define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE
+#define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE)
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 60950e7a25f5..ad55b5b99a75 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -107,6 +107,12 @@ static inline int is_module_addr(void *addr)
return 1;
}
+#ifdef CONFIG_RANDOMIZE_BASE
+#define KASLR_LEN (1UL << 31)
+#else
+#define KASLR_LEN 0UL
+#endif
+
/*
* A 64 bit pagetable entry of S390 has following format:
* | PFRA |0IPC| OS |
@@ -566,10 +572,20 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot)
}
/*
- * In the case that a guest uses storage keys
- * faults should no longer be backed by zero pages
+ * As soon as the guest uses storage keys or enables PV, we deduplicate all
+ * mapped shared zeropages and prevent new shared zeropages from getting
+ * mapped.
*/
-#define mm_forbids_zeropage mm_has_pgste
+#define mm_forbids_zeropage mm_forbids_zeropage
+static inline int mm_forbids_zeropage(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (!mm->context.allow_cow_sharing)
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_uses_skeys(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1405,6 +1421,7 @@ static inline unsigned long pud_deref(pud_t pud)
return (unsigned long)__va(pud_val(pud) & origin_mask);
}
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
return __pa(pud_deref(pud)) >> PAGE_SHIFT;
@@ -1768,8 +1785,10 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
static inline pmd_t pmdp_invalidate(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+ pmd_t pmd;
+ VM_WARN_ON_ONCE(!pmd_present(*pmdp));
+ pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
return pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
}
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index e747b067f8db..f45cfc8bc233 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -22,7 +22,6 @@ enum reserved_range_type {
RR_DECOMPRESSOR,
RR_INITRD,
RR_VMLINUX,
- RR_RELOC,
RR_AMODE31,
RR_IPLREPORT,
RR_CERT_COMP_LIST,
@@ -170,4 +169,7 @@ static inline unsigned long get_physmem_reserved(enum reserved_range_type type,
return *size;
}
+#define AMODE31_START (physmem_info.reserved[RR_AMODE31].start)
+#define AMODE31_END (physmem_info.reserved[RR_AMODE31].end)
+
#endif
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index bf15da0fedbc..0e3da500e98c 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -12,12 +12,12 @@
#define PREEMPT_NEED_RESCHED 0x80000000
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
int old, new;
@@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc)
old, new) != old);
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
__atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
/*
* With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
@@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val)
__atomic_add(val, &S390_lowcore.preempt_count);
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
__preempt_count_add(-val);
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
preempt_offset);
@@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset)
#define PREEMPT_ENABLED (0)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count);
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
S390_lowcore.preempt_count = pc;
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return false;
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
S390_lowcore.preempt_count += val;
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
S390_lowcore.preempt_count -= val;
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return !--S390_lowcore.preempt_count && tif_need_resched();
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
tif_need_resched());
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 03bcaa8effb2..32f70873e2b7 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -127,20 +127,6 @@ extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);
-extern unsigned long __kaslr_offset;
-static inline unsigned long kaslr_offset(void)
-{
- return __kaslr_offset;
-}
-
-extern int __kaslr_enabled;
-static inline int kaslr_enabled(void)
-{
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return __kaslr_enabled;
- return 0;
-}
-
struct oldmem_data {
unsigned long start;
unsigned long size;
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index db84942eb78f..7937765ccfa5 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -6,16 +6,13 @@
#define VDSO_HAS_CLOCK_GETRES 1
+#define VDSO_DELTA_NOMASK 1
+
#include <asm/syscall.h>
#include <asm/timex.h>
#include <asm/unistd.h>
#include <linux/compiler.h>
-#define vdso_calc_delta __arch_vdso_calc_delta
-static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
-{
- return (cycles - last) * mult;
-}
static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
{
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
index fe17e448c0c5..561c91c1a87c 100644
--- a/arch/s390/include/asm/vtime.h
+++ b/arch/s390/include/asm/vtime.h
@@ -2,8 +2,6 @@
#ifndef _S390_VTIME_H
#define _S390_VTIME_H
-#define __ARCH_HAS_VTIME_TASK_SWITCH
-
static inline void update_timer_sys(void)
{
S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index fa029d0dc28f..db2d9ba5a86d 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -11,6 +11,8 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
# Do not trace early setup code
CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_stacktrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_unwind_bc.o = $(CC_FLAGS_FTRACE)
endif
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
index 554447768bdd..bf983513dd33 100644
--- a/arch/s390/kernel/cert_store.c
+++ b/arch/s390/kernel/cert_store.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
#include <crypto/sha2.h>
#include <keys/user-type.h>
#include <asm/debug.h>
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d09ebb6f5262..9863ebe75019 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -465,7 +465,11 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
ehdr->e_phoff = sizeof(Elf64_Ehdr);
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
ehdr->e_phentsize = sizeof(Elf64_Phdr);
- ehdr->e_phnum = mem_chunk_cnt + 1;
+ /*
+ * Number of memory chunk PT_LOAD program headers plus one kernel
+ * image PT_LOAD program header plus one PT_NOTE program header.
+ */
+ ehdr->e_phnum = mem_chunk_cnt + 1 + 1;
return ehdr + 1;
}
@@ -501,15 +505,16 @@ static int get_mem_chunk_cnt(void)
*/
static void loads_init(Elf64_Phdr *phdr)
{
+ unsigned long old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
phys_addr_t start, end;
u64 idx;
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
- phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = old_identity_base + start;
phdr->p_offset = start;
- phdr->p_vaddr = (unsigned long)__va(start);
phdr->p_paddr = start;
+ phdr->p_filesz = end - start;
phdr->p_memsz = end - start;
phdr->p_flags = PF_R | PF_W | PF_X;
phdr->p_align = PAGE_SIZE;
@@ -518,6 +523,25 @@ static void loads_init(Elf64_Phdr *phdr)
}
/*
+ * Prepare PT_LOAD type program header for kernel image region
+ */
+static void text_init(Elf64_Phdr *phdr)
+{
+ unsigned long start_phys = os_info_old_value(OS_INFO_IMAGE_PHYS);
+ unsigned long start = os_info_old_value(OS_INFO_IMAGE_START);
+ unsigned long end = os_info_old_value(OS_INFO_IMAGE_END);
+
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = start;
+ phdr->p_filesz = end - start;
+ phdr->p_memsz = end - start;
+ phdr->p_offset = start_phys;
+ phdr->p_paddr = start_phys;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
+/*
* Initialize notes (new kernel)
*/
static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
@@ -557,6 +581,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
size += nt_vmcoreinfo_size();
/* nt_final */
size += sizeof(Elf64_Nhdr);
+ /* PT_LOAD type program header for kernel text region */
+ size += sizeof(Elf64_Phdr);
/* PT_LOADS */
size += mem_chunk_cnt * sizeof(Elf64_Phdr);
@@ -568,7 +594,7 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
*/
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
- Elf64_Phdr *phdr_notes, *phdr_loads;
+ Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
size_t alloc_size;
int mem_chunk_cnt;
void *ptr, *hdr;
@@ -606,14 +632,19 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
/* Init program headers */
phdr_notes = ptr;
ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+ phdr_text = ptr;
+ ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
phdr_loads = ptr;
ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
/* Init notes */
hdr_off = PTR_DIFF(ptr, hdr);
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+ /* Init kernel text program header */
+ text_init(phdr_text);
/* Init loads */
- hdr_off = PTR_DIFF(ptr, hdr);
loads_init(phdr_loads);
+ /* Finalize program headers */
+ hdr_off = PTR_DIFF(ptr, hdr);
*addr = (unsigned long long) hdr;
*size = (unsigned long long) hdr_off;
BUG_ON(elfcorehdr_size > alloc_size);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 787394978bc0..6a1e0fbbaa15 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler)
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
- lg %r12,__LC_GMAP
+ ltg %r12,__LC_GMAP
+ jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
@@ -635,6 +636,7 @@ SYM_DATA_START_LOCAL(daton_psw)
SYM_DATA_END(daton_psw)
.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
#include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..ddf2ee47cb87 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/moduleloader.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/kprobes.h>
+#include <linux/execmem.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/text-patching.h>
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
{
const char *start, *end;
- ftrace_plt = module_alloc(PAGE_SIZE);
+ ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");
@@ -296,6 +296,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1486350a4177..f3609a0d453d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -20,6 +20,7 @@
#include <linux/gfp.h>
#include <linux/crash_dump.h>
#include <linux/debug_locks.h>
+#include <linux/vmalloc.h>
#include <asm/asm-extable.h>
#include <asm/diag.h>
#include <asm/ipl.h>
@@ -1209,8 +1210,8 @@ static struct attribute_group reipl_nss_attr_group = {
void set_os_info_reipl_block(void)
{
- os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
- reipl_block_actual->hdr.len);
+ os_info_entry_add_data(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block_actual->hdr.len);
}
/* reipl type */
@@ -1940,7 +1941,7 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
reipl_type == IPL_TYPE_NSS ||
reipl_type == IPL_TYPE_UNKNOWN)
os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
- os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
+ os_info_entry_add_data(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
csum = (__force unsigned int)cksm(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore();
abs_lc->ipib = __pa(reipl_block_actual);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 6f71b0ce1068..259496fe0ef9 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -29,6 +29,7 @@
#include <asm/hw_irq.h>
#include <asm/stacktrace.h>
#include <asm/softirq_stack.h>
+#include <asm/vtime.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@
#define pr_fmt(fmt) "kprobes: " fmt
-#include <linux/moduleloader.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
@@ -21,6 +20,7 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
+#include <linux/execmem.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
{
void *page;
- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..91e207b50394 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <linux/memory.h>
+#include <linux/execmem.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
@@ -36,47 +37,10 @@
#define PLT_ENTRY_SIZE 22
-static unsigned long get_module_load_offset(void)
-{
- static DEFINE_MUTEX(module_kaslr_mutex);
- static unsigned long module_load_offset;
-
- if (!kaslr_enabled())
- return 0;
- /*
- * Calculate the module_load_offset the first time this code
- * is called. Once calculated it stays the same until reboot.
- */
- mutex_lock(&module_kaslr_mutex);
- if (!module_load_offset)
- module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
- mutex_unlock(&module_kaslr_mutex);
- return module_load_offset;
-}
-
-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask, PAGE_KERNEL,
- VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
- NUMA_NO_NODE, __builtin_return_address(0));
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
- vfree(p);
- return NULL;
- }
- return p;
-}
-
#ifdef CONFIG_FUNCTION_TRACER
void module_arch_cleanup(struct module *mod)
{
- module_memfree(mod->arch.trampolines_start);
+ execmem_free(mod->arch.trampolines_start);
}
#endif
@@ -510,7 +474,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
- start = module_alloc(numpages * PAGE_SIZE);
+ start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
if (!start)
return -ENOMEM;
set_memory_rox((unsigned long)start, numpages);
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index c77382a67325..230d010bac9b 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -31,6 +31,7 @@
#include <asm/crw.h>
#include <asm/asm-offsets.h>
#include <asm/pai.h>
+#include <asm/vtime.h>
struct mcck_struct {
unsigned int kill_task : 1;
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index d1b16d83e49a..9b8c24ebb008 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -114,10 +114,10 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
type = BRASL_EXPOLINE; /* brasl instruction */
else
continue;
- thunk = instr + (*(int *)(instr + 2)) * 2;
+ thunk = instr + (long)(*(int *)(instr + 2)) * 2;
if (thunk[0] == 0xc6 && thunk[1] == 0x00)
/* exrl %r0,<target-br> */
- br = thunk + (*(int *)(thunk + 2)) * 2;
+ br = thunk + (long)(*(int *)(thunk + 2)) * 2;
else
continue;
if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index a801e6bd5341..b695f980bbde 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -15,8 +15,10 @@
#include <asm/checksum.h>
#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
+#include <asm/physmem_info.h>
#include <asm/maccess.h>
#include <asm/asm-offsets.h>
+#include <asm/ipl.h>
/*
* OS info structure has to be page aligned
@@ -43,9 +45,9 @@ void os_info_crashkernel_add(unsigned long base, unsigned long size)
}
/*
- * Add OS info entry and update checksum
+ * Add OS info data entry and update checksum
*/
-void os_info_entry_add(int nr, void *ptr, u64 size)
+void os_info_entry_add_data(int nr, void *ptr, u64 size)
{
os_info.entry[nr].addr = __pa(ptr);
os_info.entry[nr].size = size;
@@ -54,15 +56,36 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
}
/*
+ * Add OS info value entry and update checksum
+ */
+void os_info_entry_add_val(int nr, u64 value)
+{
+ os_info.entry[nr].val = value;
+ os_info.entry[nr].size = 0;
+ os_info.entry[nr].csum = 0;
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
* Initialize OS info structure and set lowcore pointer
*/
void __init os_info_init(void)
{
struct lowcore *abs_lc;
+ BUILD_BUG_ON(sizeof(struct os_info) != PAGE_SIZE);
os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
+ os_info_entry_add_val(OS_INFO_IDENTITY_BASE, __identity_base);
+ os_info_entry_add_val(OS_INFO_KASLR_OFFSET, kaslr_offset());
+ os_info_entry_add_val(OS_INFO_KASLR_OFF_PHYS, __kaslr_offset_phys);
+ os_info_entry_add_val(OS_INFO_VMEMMAP, (unsigned long)vmemmap);
+ os_info_entry_add_val(OS_INFO_AMODE31_START, AMODE31_START);
+ os_info_entry_add_val(OS_INFO_AMODE31_END, AMODE31_END);
+ os_info_entry_add_val(OS_INFO_IMAGE_START, (unsigned long)_stext);
+ os_info_entry_add_val(OS_INFO_IMAGE_END, (unsigned long)_end);
+ os_info_entry_add_val(OS_INFO_IMAGE_PHYS, __pa_symbol(_stext));
os_info.csum = os_info_csum(&os_info);
abs_lc = get_abs_lowcore();
abs_lc->os_info = __pa(&os_info);
@@ -125,7 +148,7 @@ static void os_info_old_init(void)
if (os_info_init)
return;
- if (!oldmem_data.start)
+ if (!oldmem_data.start && !is_ipl_type_dump())
goto fail;
if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
goto fail;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 41ed6e0f0a2a..1434642e9cba 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -428,7 +428,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
case CPUMF_CTR_SET_CRYPTO:
if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
ctrset_size = 16;
- else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+ else if (cpumf_ctr_info.csvn >= 6)
ctrset_size = 20;
break;
case CPUMF_CTR_SET_EXT:
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 0d64aafd158f..e4a6bfc91080 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -855,16 +855,11 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
}
/* Determine version specific crypto set */
- switch (ci.csvn) {
- case 1 ... 5:
+ csvn = none;
+ if (ci.csvn >= 1 && ci.csvn <= 5)
csvn = cpumcf_svn_12345_pmu_event_attr;
- break;
- case 6 ... 7:
+ else if (ci.csvn >= 6)
csvn = cpumcf_svn_67_pmu_event_attr;
- break;
- default:
- csvn = none;
- }
/* Determine model-specific counter set(s) */
get_cpu_id(&cpu_id);
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 823d652e3917..4ad472d130a3 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event)
event->cpu);
struct paicrypt_map *cpump = mp->mapptr;
- cpump->event = NULL;
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
@@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags)
static void paicrypt_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paicrypt_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 616a25606cd6..a6da7e0cc7a6 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event)
free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
- cpump->event = NULL;
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
@@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags)
static void paiext_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paiext_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 24ed33f044ec..cbd5290939df 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -146,10 +146,10 @@ static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
unsigned long __bootdata_preserved(max_mappable);
-unsigned long __bootdata(ident_map_size);
struct physmem_info __bootdata(physmem_info);
-unsigned long __bootdata_preserved(__kaslr_offset);
+struct vm_layout __bootdata_preserved(vm_layout);
+EXPORT_SYMBOL_GPL(vm_layout);
int __bootdata_preserved(__kaslr_enabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
@@ -765,7 +765,7 @@ static void __init relocate_amode31_section(void)
unsigned long amode31_size = __eamode31 - __samode31;
long amode31_offset, *ptr;
- amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
+ amode31_offset = AMODE31_START - (unsigned long)__samode31;
pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
/* Move original AMODE31 section to the new one */
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 94f440e38303..7c294da45bf5 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -101,3 +101,22 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
}
pagefault_enable();
}
+
+unsigned long return_address(unsigned int n)
+{
+ struct unwind_state state;
+ unsigned long addr;
+
+ /* Increment to skip current stack entry */
+ n++;
+
+ unwind_for_each_frame(&state, NULL, NULL, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ break;
+ if (!n--)
+ return addr;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
index fb85e797946d..1bb78b9468e8 100644
--- a/arch/s390/kernel/syscalls/Makefile
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -4,8 +4,8 @@ gen := arch/$(ARCH)/include/generated
kapi := $(gen)/asm
uapi := $(gen)/uapi/asm
-syscall := $(srctree)/$(src)/syscall.tbl
-systbl := $(srctree)/$(src)/syscalltbl
+syscall := $(src)/syscall.tbl
+systbl := $(src)/syscalltbl
gen-y := $(kapi)/syscall_table.h
kapi-hdrs-y := $(kapi)/unistd_nr.h
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index fc07bc39e698..265fea37e030 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -21,6 +21,7 @@
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
+EXPORT_SYMBOL(prot_virt_guest);
#endif
/*
@@ -181,36 +182,36 @@ int uv_convert_owned_from_secure(unsigned long paddr)
}
/*
- * Calculate the expected ref_count for a page that would otherwise have no
+ * Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
* the kernel, but with some slight modifications. We know that a secure
- * page can not be a huge page for example.
+ * folio can not be a large folio, for example.
*/
-static int expected_page_refs(struct page *page)
+static int expected_folio_refs(struct folio *folio)
{
int res;
- res = page_mapcount(page);
- if (PageSwapCache(page)) {
+ res = folio_mapcount(folio);
+ if (folio_test_swapcache(folio)) {
res++;
- } else if (page_mapping(page)) {
+ } else if (folio_mapping(folio)) {
res++;
- if (page_has_private(page))
+ if (folio->private)
res++;
}
return res;
}
-static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
+static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
- if (PageWriteback(page))
+ if (folio_test_writeback(folio))
return -EAGAIN;
- expected = expected_page_refs(page);
- if (!page_ref_freeze(page, expected))
+ expected = expected_folio_refs(folio);
+ if (!folio_ref_freeze(folio, expected))
return -EBUSY;
- set_bit(PG_arch_1, &page->flags);
+ set_bit(PG_arch_1, &folio->flags);
/*
* If the UVC does not succeed or fail immediately, we don't want to
* loop for long, or we might get stall notifications.
@@ -220,9 +221,9 @@ static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
* -EAGAIN and we let the callers deal with it.
*/
cc = __uv_call(0, (u64)uvcb);
- page_ref_unfreeze(page, expected);
+ folio_ref_unfreeze(folio, expected);
/*
- * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
+ * Return -ENXIO if the folio was not mapped, -EINVAL for other errors.
* If busy or partially completed, return -EAGAIN.
*/
if (cc == UVC_CC_OK)
@@ -277,7 +278,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
bool local_drain = false;
spinlock_t *ptelock;
unsigned long uaddr;
- struct page *page;
+ struct folio *folio;
pte_t *ptep;
int rc;
@@ -306,15 +307,19 @@ again:
if (!ptep)
goto out;
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- page = pte_page(*ptep);
+ folio = page_folio(pte_page(*ptep));
+ rc = -EINVAL;
+ if (folio_test_large(folio))
+ goto unlock;
rc = -EAGAIN;
- if (trylock_page(page)) {
+ if (folio_trylock(folio)) {
if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(page_to_phys(page));
- rc = make_page_secure(page, uvcb);
- unlock_page(page);
+ uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
+ rc = make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
}
}
+unlock:
pte_unmap_unlock(ptep, ptelock);
out:
mmap_read_unlock(gmap->mm);
@@ -324,10 +329,10 @@ out:
* If we are here because the UVC returned busy or partial
* completion, this is just a useless check, but it is safe.
*/
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
} else if (rc == -EBUSY) {
/*
- * If we have tried a local drain and the page refcount
+ * If we have tried a local drain and the folio refcount
* still does not match our expected safe value, try with a
* system wide drain. This is needed if the pagevecs holding
* the page are on a different CPU.
@@ -338,7 +343,7 @@ out:
return -EAGAIN;
}
/*
- * We are here if the page refcount does not match the
+ * We are here if the folio refcount does not match the
* expected safe value. The main culprits are usually
* pagevecs. With lru_add_drain() we drain the pagevecs
* on the local CPU so that hopefully the refcount will
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index b12a274cbb47..df928fee26b5 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso
-KCOV_INSTRUMENT := n
-
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso32 = vdso_user_wrapper-32.o note-32.o
@@ -32,19 +30,13 @@ obj-y += vdso32_wrapper.o
targets += vdso32.lds
CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
# Force dependency (incbin is bad)
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
quiet_cmd_vdso_and_check = VDSO $@
cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
+$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) FORCE
$(call if_changed,vdso_and_check)
# strip rule for the .so file
@@ -62,7 +54,7 @@ quiet_cmd_vdso32cc = VDSO32C $@
cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index ef9832726097..6da1b9ad8ab0 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# List of files in the vdso
-KCOV_INSTRUMENT := n
-
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso64 = vdso_user_wrapper.o note.o
@@ -37,12 +35,6 @@ obj-y += vdso64_wrapper.o
targets += vdso64.lds
CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
# Force dependency (incbin is bad)
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
@@ -50,7 +42,7 @@ quiet_cmd_vdso_and_check = VDSO $@
cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
$(call if_changed,vdso_and_check)
# strip rule for the .so file
@@ -72,7 +64,7 @@ quiet_cmd_vdso64cc = VDSO64C $@
cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
# Generate VDSO offsets using helper script
-gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+gen-vdsosym := $(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index 57f62596e53b..85247ef5a41b 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -24,8 +24,10 @@ __kernel_\func:
CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
stg %r14,STACK_FRAME_OVERHEAD(%r15)
+ CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD
brasl %r14,__s390_vdso_\func
lg %r14,STACK_FRAME_OVERHEAD(%r15)
+ CFI_RESTORE 14
aghi %r15,WRAPPER_FRAME_SIZE
CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
index d296dfc22191..23f7d7619a99 100644
--- a/arch/s390/kernel/vmcore_info.c
+++ b/arch/s390/kernel/vmcore_info.c
@@ -14,7 +14,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+ vmcoreinfo_append_str("IDENTITYBASE=%lx\n", __identity_base);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ vmcoreinfo_append_str("KERNELOFFPHYS=%lx\n", __kaslr_offset_phys);
abs_lc = get_abs_lowcore();
abs_lc->vmcore_info = paddr_vmcoreinfo_note();
put_abs_lowcore(abs_lc);
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 48de296e8905..a1ce3925ec71 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -39,7 +39,7 @@ PHDRS {
SECTIONS
{
- . = 0x100000;
+ . = __START_KERNEL;
.text : {
_stext = .; /* Start of text section */
_text = .; /* Text and read-only data */
@@ -183,7 +183,7 @@ SECTIONS
.amode31.data : {
*(.amode31.data)
}
- . = ALIGN(PAGE_SIZE);
+ . = _samode31 + AMODE31_SIZE;
_eamode31 = .;
/* early.c uses stsi, which requires page aligned data. */
@@ -192,31 +192,6 @@ SECTIONS
PERCPU_SECTION(0x100)
-#ifdef CONFIG_PIE_BUILD
- .dynsym ALIGN(8) : {
- __dynsym_start = .;
- *(.dynsym)
- __dynsym_end = .;
- }
- .rela.dyn ALIGN(8) : {
- __rela_dyn_start = .;
- *(.rela*)
- __rela_dyn_end = .;
- }
- .dynamic ALIGN(8) : {
- *(.dynamic)
- }
- .dynstr ALIGN(8) : {
- *(.dynstr)
- }
-#endif
- .hash ALIGN(8) : {
- *(.hash)
- }
- .gnu.hash ALIGN(8) : {
- *(.gnu.hash)
- }
-
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
@@ -230,7 +205,6 @@ SECTIONS
* it should match struct vmlinux_info
*/
.vmlinux.info 0 (INFO) : {
- QUAD(_stext) /* default_lma */
QUAD(startup_continue) /* entry */
QUAD(__bss_start - _stext) /* image_size */
QUAD(__bss_stop - __bss_start) /* bss_size */
@@ -239,14 +213,8 @@ SECTIONS
QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */
QUAD(__boot_data_preserved_end -
__boot_data_preserved_start) /* bootdata_preserved_size */
-#ifdef CONFIG_PIE_BUILD
- QUAD(__dynsym_start) /* dynsym_start */
- QUAD(__rela_dyn_start) /* rela_dyn_start */
- QUAD(__rela_dyn_end) /* rela_dyn_end */
-#else
QUAD(__got_start) /* got_start */
QUAD(__got_end) /* got_end */
-#endif
QUAD(_eamode31 - _samode31) /* amode31_size */
QUAD(init_mm)
QUAD(swapper_pg_dir)
@@ -282,12 +250,10 @@ SECTIONS
*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)
}
ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!")
-#ifndef CONFIG_PIE_BUILD
.rela.dyn : {
*(.rela.*) *(.rela_*)
}
ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!")
-#endif
/* Sections to be discarded */
DISCARDS
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5147b943a864..82e9631cd9ef 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -587,7 +587,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_S390_HPAGE_1M:
r = 0;
- if (hpage && !kvm_is_ucontrol(kvm))
+ if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
r = 1;
break;
case KVM_CAP_S390_MEM_OP:
@@ -2631,9 +2631,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
if (r)
break;
- mmap_write_lock(current->mm);
- r = gmap_mark_unmergeable();
- mmap_write_unlock(current->mm);
+ r = s390_disable_cow_sharing();
if (r)
break;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index b2c9f010f0fe..c9ecae830634 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -12,6 +12,7 @@
#include <linux/list.h>
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
+#include <linux/io.h>
#include <asm/gmap.h>
#include <asm/mmu_context.h>
@@ -361,7 +362,7 @@ end:
case -EACCES:
return set_validity_icpt(scb_s, 0x003CU);
}
- scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
+ scb_s->crycbd = (u32)virt_to_phys(&vsie_page->crycb) | CRYCB_FORMAT2;
return 0;
}
@@ -1005,7 +1006,7 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (read_guest_real(vcpu, fac, &vsie_page->fac,
stfle_size() * sizeof(u64)))
return set_validity_icpt(scb_s, 0x1090U);
- scb_s->fac = (__u32)(__u64) &vsie_page->fac;
+ scb_s->fac = (u32)virt_to_phys(&vsie_page->fac);
}
return 0;
}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 90eac15ea62a..f43f897d3fc0 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -23,4 +23,4 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
-obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/
+obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
diff --git a/arch/s390/lib/expoline/expoline.S b/arch/s390/lib/expoline.S
index 92ed8409a7a4..92ed8409a7a4 100644
--- a/arch/s390/lib/expoline/expoline.S
+++ b/arch/s390/lib/expoline.S
diff --git a/arch/s390/lib/expoline/Makefile b/arch/s390/lib/expoline/Makefile
deleted file mode 100644
index 854631d9cb03..000000000000
--- a/arch/s390/lib/expoline/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-y += expoline.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index c421dd44ffbe..65747f15dbec 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
gmap = (struct gmap *)S390_lowcore.gmap;
- if (regs->cr1 == gmap->asce)
+ if (gmap && gmap->asce == regs->cr1)
return GMAP_FAULT;
return KERNEL_FAULT;
}
@@ -325,7 +325,8 @@ static void do_exception(struct pt_regs *regs, int access)
goto lock_mmap;
if (!(vma->vm_flags & access)) {
vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return handle_fault_error_nolock(regs, SEGV_ACCERR);
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 094b43b121cd..474a25ca5c48 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2550,41 +2550,6 @@ static inline void thp_split_mm(struct mm_struct *mm)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * Remove all empty zero pages from the mapping for lazy refaulting
- * - This must be called after mm->context.has_pgste is set, to avoid
- * future creation of zero pages
- * - This must be called after THP was disabled.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * racing with the loop below and so causing pte_offset_map_lock() to fail,
- * it will never insert a page table containing empty zero pages once
- * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
- */
-static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long addr;
-
- for (addr = start; addr != end; addr += PAGE_SIZE) {
- pte_t *ptep;
- spinlock_t *ptl;
-
- ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
- if (!ptep)
- break;
- if (is_zero_pfn(pte_pfn(*ptep)))
- ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
- pte_unmap_unlock(ptep, ptl);
- }
- return 0;
-}
-
-static const struct mm_walk_ops zap_zero_walk_ops = {
- .pmd_entry = __zap_zero_pages,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2601,22 +2566,142 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
mmap_write_unlock(mm);
return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-int gmap_mark_unmergeable(void)
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/*
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __s390_unshare_zeropages(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+static int __s390_disable_cow_sharing(struct mm_struct *mm)
{
+ int rc;
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __s390_unshare_zeropages(mm);
/*
* Make sure to disable KSM (if enabled for the whole process or
* individual VMAs). Note that nothing currently hinders user space
* from re-enabling it.
*/
- return ksm_disable(current->mm);
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+
+/*
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int s390_disable_cow_sharing(void)
+{
+ int rc;
+
+ mmap_write_lock(current->mm);
+ rc = __s390_disable_cow_sharing(current->mm);
+ mmap_write_unlock(current->mm);
+ return rc;
}
-EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
/*
* Enable storage key handling from now on and initialize the storage
@@ -2661,7 +2746,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
return 0;
start = pmd_val(*pmd) & HPAGE_MASK;
- end = start + HPAGE_SIZE - 1;
+ end = start + HPAGE_SIZE;
__storage_key_init_range(start, end);
set_bit(PG_arch_1, &page->flags);
cond_resched();
@@ -2685,7 +2770,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = gmap_mark_unmergeable();
+ rc = __s390_disable_cow_sharing(mm);
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index c2e8242bd15d..2675aab4acc7 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -139,7 +139,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
}
if (!test_and_set_bit(PG_arch_1, &page->flags))
- __storage_key_init_range(paddr, paddr + size - 1);
+ __storage_key_init_range(paddr, paddr + size);
}
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -233,16 +233,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return (pte_t *) pmdp;
}
-int pmd_huge(pmd_t pmd)
-{
- return pmd_leaf(pmd);
-}
-
-int pud_huge(pud_t pud)
-{
- return pud_leaf(pud);
-}
-
bool __init arch_hugetlb_valid_size(unsigned long size)
{
if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
@@ -258,14 +248,12 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
- info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
info.high_limit = TASK_SIZE;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
return vm_unmapped_area(&info);
}
@@ -274,7 +262,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
unsigned long addr;
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -282,7 +270,6 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
info.low_limit = PAGE_SIZE;
info.high_limit = current->mm->mmap_base;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
@@ -328,7 +315,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
goto check_asce_limit;
}
- if (mm->get_unmapped_area == arch_get_unmapped_area)
+ if (!test_bit(MMF_TOPDOWN, &mm->flags))
addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
else
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f6391442c0c2..e769d2726f4e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -49,6 +49,7 @@
#include <asm/uv.h>
#include <linux/virtio_anchor.h>
#include <linux/virtio_config.h>
+#include <linux/execmem.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
@@ -302,3 +303,32 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
vmem_remove_mapping(start, size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ unsigned long module_load_offset = 0;
+ unsigned long start;
+
+ if (kaslr_enabled())
+ module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
+
+ start = MODULES_VADDR + module_load_offset;
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .flags = EXECMEM_KASAN_SHADOW,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = MODULE_ALIGN,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index b14fc0887654..206756946589 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -86,7 +86,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -102,7 +102,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
goto check_asce_limit;
}
- info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
@@ -122,7 +121,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -185,10 +184,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
*/
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor);
- mm->get_unmapped_area = arch_get_unmapped_area;
+ clear_bit(MMF_TOPDOWN, &mm->flags);
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ set_bit(MMF_TOPDOWN, &mm->flags);
}
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 85cddf904cb2..41c714e21292 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -13,7 +13,9 @@
#include <linux/slab.h>
#include <linux/sort.h>
#include <asm/page-states.h>
+#include <asm/abs_lowcore.h>
#include <asm/cacheflush.h>
+#include <asm/maccess.h>
#include <asm/nospec-branch.h>
#include <asm/ctlreg.h>
#include <asm/pgalloc.h>
@@ -21,6 +23,7 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/set_memory.h>
+#include <asm/physmem_info.h>
static DEFINE_MUTEX(vmem_mutex);
@@ -436,7 +439,7 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
return -EINVAL;
/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
- if (WARN_ON_ONCE(end > VMALLOC_START))
+ if (WARN_ON_ONCE(end > __abs_lowcore))
return -EINVAL;
for (addr = start; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b418333bb086..4be8f5cadd02 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
* PLT for hotpatchable calls. The calling convention is the same as for the
* ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
*/
-extern const char bpf_plt[];
-extern const char bpf_plt_ret[];
-extern const char bpf_plt_target[];
-extern const char bpf_plt_end[];
-#define BPF_PLT_SIZE 32
+struct bpf_plt {
+ char code[16];
+ void *ret;
+ void *target;
+} __packed;
+extern const struct bpf_plt bpf_plt;
asm(
".pushsection .rodata\n"
" .balign 8\n"
@@ -531,15 +532,14 @@ asm(
" .balign 8\n"
"bpf_plt_ret: .quad 0\n"
"bpf_plt_target: .quad 0\n"
- "bpf_plt_end:\n"
" .popsection\n"
);
-static void bpf_jit_plt(void *plt, void *ret, void *target)
+static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
- memcpy(plt, bpf_plt, BPF_PLT_SIZE);
- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
+ memcpy(plt, &bpf_plt, sizeof(*plt));
+ plt->ret = ret;
+ plt->target = target;
}
/*
@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
if (jit->prg_buf)
- bpf_jit_plt(jit->prg_buf + jit->prg,
+ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
jit->prg_buf + jit->prologue_plt_ret, NULL);
- jit->prg += BPF_PLT_SIZE;
+ jit->prg += sizeof(struct bpf_plt);
}
static int get_probe_mem_regno(const u8 *insn)
@@ -1427,8 +1427,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \
(insn->imm & BPF_FETCH) ? src_reg : REG_W0, \
src_reg, dst_reg, off); \
- if (is32 && (insn->imm & BPF_FETCH)) \
- EMIT_ZERO(src_reg); \
+ if (insn->imm & BPF_FETCH) { \
+ /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \
+ _EMIT2(0x07e0); \
+ if (is32) \
+ EMIT_ZERO(src_reg); \
+ } \
} while (0)
case BPF_ADD:
case BPF_ADD | BPF_FETCH:
@@ -2040,9 +2044,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
- return orig_fp;
-
if (!fp->jit_requested)
return orig_fp;
@@ -2111,7 +2112,11 @@ skip_init_ctx:
print_fn_code(jit.prg_buf, jit.size_prg);
}
if (!fp->is_func || extra_pass) {
- bpf_jit_binary_lock_ro(header);
+ if (bpf_jit_binary_lock_ro(header)) {
+ bpf_jit_binary_free(header);
+ fp = orig_fp;
+ goto free_addrs;
+ }
} else {
jit_data->header = header;
jit_data->ctx = jit;
@@ -2148,14 +2153,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
{
+ struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
u16 opc;
s32 disp;
} __packed insn;
- char expected_plt[BPF_PLT_SIZE];
- char current_plt[BPF_PLT_SIZE];
- char new_plt[BPF_PLT_SIZE];
- char *plt;
char *ret;
int err;
@@ -2174,18 +2176,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
*/
} else {
/* Verify the PLT. */
- plt = (char *)ip + (insn.disp << 1);
- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ plt = ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(&current_plt, plt,
+ sizeof(current_plt));
if (err < 0)
return err;
ret = (char *)ip + 6;
- bpf_jit_plt(expected_plt, ret, old_addr);
- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ bpf_jit_plt(&expected_plt, ret, old_addr);
+ if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
return -EINVAL;
/* Adjust the call address. */
- bpf_jit_plt(new_plt, ret, new_addr);
- s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
- new_plt + (bpf_plt_target - bpf_plt),
+ bpf_jit_plt(&new_plt, ret, new_addr);
+ s390_kernel_write(&plt->target, &new_plt.target,
sizeof(void *));
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 26afde0d1ed3..0de0f6e405b5 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -250,12 +250,6 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
return 0;
}
-/* combine single writes by using store-block insn */
-void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
-{
- zpci_memcpy_toio(to, from, count * 8);
-}
-
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
unsigned long prot)
{
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index a90499c087f0..5398729bfe1b 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -169,7 +169,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+ ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
if (ret)
goto out_unlock_mmap;
@@ -308,7 +308,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+ ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
if (ret)
goto out_unlock_mmap;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index a0b872b74fe3..0f4f1e8fc480 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -172,7 +172,6 @@ static ssize_t uid_is_unique_show(struct device *dev,
}
static DEVICE_ATTR_RO(uid_is_unique);
-#ifndef CONFIG_DMI
/* analogous to smbios index */
static ssize_t index_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -202,7 +201,6 @@ static struct attribute_group zpci_ident_attr_group = {
.attrs = zpci_ident_attrs,
.is_visible = zpci_index_is_visible,
};
-#endif
static struct bin_attribute *zpci_bin_attrs[] = {
&bin_attr_util_string,
@@ -245,8 +243,6 @@ static struct attribute_group pfip_attr_group = {
const struct attribute_group *zpci_attr_groups[] = {
&zpci_attr_group,
&pfip_attr_group,
-#ifndef CONFIG_DMI
&zpci_ident_attr_group,
-#endif
NULL,
};
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 4e930f566878..24eccaa29337 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-OBJECT_FILES_NON_STANDARD := y
-
purgatory-y := head.o purgatory.o string.o sha256.o mem.o
targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro
@@ -15,12 +13,6 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-KCOV_INSTRUMENT := n
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c
index 30a732c808f3..a74dbd5c9896 100644
--- a/arch/s390/tools/relocs.c
+++ b/arch/s390/tools/relocs.c
@@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel)
case R_390_GOTOFF64:
break;
case R_390_64:
- add_reloc(&relocs64, offset);
+ add_reloc(&relocs64, offset - ehdr.e_entry);
break;
default:
die("Unsupported relocation type: %d\n", r_type);