summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/vmem.c15
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/s390/crypto/paes_s390.c9
-rw-r--r--arch/s390/include/asm/asm-prototypes.h4
-rw-r--r--arch/s390/include/asm/cmpxchg.h32
-rw-r--r--arch/s390/include/asm/cpacf.h7
-rw-r--r--arch/s390/include/asm/cpu_mf.h2
-rw-r--r--arch/s390/include/asm/os_info.h7
-rw-r--r--arch/s390/include/asm/percpu.h34
-rw-r--r--arch/s390/include/asm/pgtable.h3
-rw-r--r--arch/s390/include/asm/physmem_info.h5
-rw-r--r--arch/s390/include/asm/pkey.h4
-rw-r--r--arch/s390/include/asm/thread_info.h3
-rw-r--r--arch/s390/include/asm/timex.h13
-rw-r--r--arch/s390/include/uapi/asm/pkey.h15
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/ipl.c16
-rw-r--r--arch/s390/kernel/module.c3
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c452
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c16
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c19
-rw-r--r--arch/s390/kernel/perf_pai_ext.c23
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/time.c5
-rw-r--r--arch/s390/kernel/uv.c2
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/tishift.S63
-rw-r--r--arch/s390/mm/fault.c5
-rw-r--r--arch/s390/mm/gmap.c31
-rw-r--r--arch/s390/mm/pageattr.c1
-rw-r--r--arch/s390/mm/pgtable.c12
-rw-r--r--arch/s390/mm/vmem.c14
-rw-r--r--arch/s390/purgatory/Makefile2
37 files changed, 579 insertions, 250 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 6dab9c1be508..5b39918b7042 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -117,6 +117,7 @@ config S390
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
select ARCH_SUPPORTS_HUGETLBFS
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index acb1f8b53105..c67f59db7a51 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -45,6 +45,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
static pte_t pte_z;
+static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
+{
+ start = PAGE_ALIGN_DOWN(__sha(start));
+ end = PAGE_ALIGN(__sha(end));
+ pgtable_populate(start, end, mode);
+}
+
static void kasan_populate_shadow(void)
{
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
@@ -95,17 +102,17 @@ static void kasan_populate_shadow(void)
*/
for_each_physmem_usable_range(i, &start, &end)
- pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
- pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW);
+ kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
} else {
untracked_end = MODULES_VADDR;
}
/* populate kasan shadow for untracked memory */
- pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW);
- pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index be3bf03bf361..aa95cf6dfabb 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -116,6 +116,7 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 769c7eed8b6a..f041945f9148 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -107,6 +107,7 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 29dc827e0fe8..d29a9d908797 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,7 +5,7 @@
* s390 implementation of the AES Cipher Algorithm with protected keys.
*
* s390 Version:
- * Copyright IBM Corp. 2017,2020
+ * Copyright IBM Corp. 2017, 2023
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
*/
@@ -132,7 +132,8 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb,
if (i > 0 && ret == -EAGAIN && in_task())
if (msleep_interruptible(1000))
return -EINTR;
- ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk);
+ ret = pkey_keyblob2pkey(kb->key, kb->keylen,
+ pk->protkey, &pk->len, &pk->type);
if (ret == 0)
break;
}
@@ -145,6 +146,7 @@ static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
int ret;
struct pkey_protkey pkey;
+ pkey.len = sizeof(pkey.protkey);
ret = __paes_keyblob2pkey(&ctx->kb, &pkey);
if (ret)
return ret;
@@ -414,6 +416,9 @@ static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
{
struct pkey_protkey pkey0, pkey1;
+ pkey0.len = sizeof(pkey0.protkey);
+ pkey1.len = sizeof(pkey1.protkey);
+
if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) ||
__paes_keyblob2pkey(&ctx->kb[1], &pkey1))
return -EINVAL;
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
index c37eb921bfbf..a873e873e1ee 100644
--- a/arch/s390/include/asm/asm-prototypes.h
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -6,4 +6,8 @@
#include <asm/fpu/api.h>
#include <asm-generic/asm-prototypes.h>
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+
#endif /* _ASM_S390_PROTOTYPES_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 06e0e42f4eec..aae0315374de 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -190,38 +190,18 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
#define arch_cmpxchg_local arch_cmpxchg
#define arch_cmpxchg64_local arch_cmpxchg
-#define system_has_cmpxchg_double() 1
+#define system_has_cmpxchg128() 1
-static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2,
- unsigned long o1, unsigned long o2,
- unsigned long n1, unsigned long n2)
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
{
- union register_pair old = { .even = o1, .odd = o2, };
- union register_pair new = { .even = n1, .odd = n2, };
- int cc;
-
asm volatile(
" cdsg %[old],%[new],%[ptr]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [old] "+&d" (old.pair)
- : [new] "d" (new.pair),
- [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2)
+ : [old] "+d" (old), [ptr] "+QS" (*ptr)
+ : [new] "d" (new)
: "memory", "cc");
- return !cc;
+ return old;
}
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
-({ \
- typeof(p1) __p1 = (p1); \
- typeof(p2) __p2 = (p2); \
- \
- BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
- BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
- VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
- __cmpxchg_double((unsigned long)__p1, (unsigned long)__p2, \
- (unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2)); \
-})
+#define arch_cmpxchg128 arch_cmpxchg128
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 646b12981f20..b378e2b57ad8 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -2,7 +2,7 @@
/*
* CP Assist for Cryptographic Functions (CPACF)
*
- * Copyright IBM Corp. 2003, 2017
+ * Copyright IBM Corp. 2003, 2023
* Author(s): Thomas Spatzier
* Jan Glauber
* Harald Freudenberger (freude@de.ibm.com)
@@ -132,6 +132,11 @@
#define CPACF_PCKMO_ENC_AES_128_KEY 0x12
#define CPACF_PCKMO_ENC_AES_192_KEY 0x13
#define CPACF_PCKMO_ENC_AES_256_KEY 0x14
+#define CPACF_PCKMO_ENC_ECC_P256_KEY 0x20
+#define CPACF_PCKMO_ENC_ECC_P384_KEY 0x21
+#define CPACF_PCKMO_ENC_ECC_P521_KEY 0x22
+#define CPACF_PCKMO_ENC_ECC_ED25519_KEY 0x28
+#define CPACF_PCKMO_ENC_ECC_ED448_KEY 0x29
/*
* Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 7e417d7de568..a0de5b9b02ea 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -140,7 +140,7 @@ union hws_trailer_header {
unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
unsigned long long overflow; /* 64 - Overflow Count */
};
- __uint128_t val;
+ u128 val;
};
struct hws_trailer_entry {
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 0d1c74a7a650..a4d2e103f116 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -16,6 +16,9 @@
#define OS_INFO_VMCOREINFO 0
#define OS_INFO_REIPL_BLOCK 1
+#define OS_INFO_FLAGS_ENTRY 2
+
+#define OS_INFO_FLAG_REIPL_CLEAR (1UL << 0)
struct os_info_entry {
u64 addr;
@@ -30,8 +33,8 @@ struct os_info {
u16 version_minor;
u64 crashkernel_addr;
u64 crashkernel_size;
- struct os_info_entry entry[2];
- u8 reserved[4024];
+ struct os_info_entry entry[3];
+ u8 reserved[4004];
} __packed;
void os_info_init(void);
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 081837b391e3..264095dd84bc 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -148,6 +148,22 @@
#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, oval, nval) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ u128 old__, new__, ret__; \
+ pcp_op_T__ *ptr__; \
+ old__ = oval; \
+ new__ = nval; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg128((void *)ptr__, old__, new__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
#define arch_this_cpu_xchg(pcp, nval) \
({ \
typeof(pcp) *ptr__; \
@@ -164,24 +180,6 @@
#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
-#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \
-({ \
- typeof(pcp1) *p1__; \
- typeof(pcp2) *p2__; \
- int ret__; \
- \
- preempt_disable_notrace(); \
- p1__ = raw_cpu_ptr(&(pcp1)); \
- p2__ = raw_cpu_ptr(&(pcp2)); \
- ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__, \
- (unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2)); \
- preempt_enable_notrace(); \
- ret__; \
-})
-
-#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
-
#include <asm-generic/percpu.h>
#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6822a11c2c8a..c55f3c3365af 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count)
atomic_long_add(count, &direct_pages_count[level]);
}
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
-
/*
* The S390 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information.
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 8e9c582592b3..9e41a74fce9a 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -3,6 +3,7 @@
#define _ASM_S390_MEM_DETECT_H
#include <linux/types.h>
+#include <asm/page.h>
enum physmem_info_source {
MEM_DETECT_NONE = 0,
@@ -133,7 +134,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t)
#define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \
for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \
- range && range->end; range = range->chain, \
+ range && range->end; range = range->chain ? __va(range->chain) : NULL, \
*p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t,
@@ -145,7 +146,7 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range
return range;
}
if (range->chain)
- return range->chain;
+ return __va(range->chain);
while (++*t < RR_MAX) {
range = &physmem_info.reserved[*t];
if (range->end)
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index dd3d20c332ac..47d80a7451a6 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -2,7 +2,7 @@
/*
* Kernelspace interface to the pkey device driver
*
- * Copyright IBM Corp. 2016,2019
+ * Copyright IBM Corp. 2016, 2023
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -23,6 +23,6 @@
* @return 0 on success, negative errno value on failure
*/
int pkey_keyblob2pkey(const u8 *key, u32 keylen,
- struct pkey_protkey *protkey);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype);
#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c7c97921ed8d..a674c7d25da5 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -52,9 +52,6 @@ struct thread_info {
struct task_struct;
-void arch_release_task_struct(struct task_struct *tsk);
-int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-
void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index ce878e85b6e4..4d646659a5f5 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -63,7 +63,7 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk)
return cc;
}
-static inline void store_tod_clock_ext(union tod_clock *tod)
+static __always_inline void store_tod_clock_ext(union tod_clock *tod)
{
asm volatile("stcke %0" : "=Q" (*tod) : : "cc");
}
@@ -177,7 +177,7 @@ static inline void local_tick_enable(unsigned long comp)
typedef unsigned long cycles_t;
-static inline unsigned long get_tod_clock(void)
+static __always_inline unsigned long get_tod_clock(void)
{
union tod_clock clk;
@@ -204,6 +204,11 @@ void init_cpu_timer(void);
extern union tod_clock tod_clock_base;
+static __always_inline unsigned long __get_tod_clock_monotonic(void)
+{
+ return get_tod_clock() - tod_clock_base.tod;
+}
+
/**
* get_clock_monotonic - returns current time in clock rate units
*
@@ -216,7 +221,7 @@ static inline unsigned long get_tod_clock_monotonic(void)
unsigned long tod;
preempt_disable_notrace();
- tod = get_tod_clock() - tod_clock_base.tod;
+ tod = __get_tod_clock_monotonic();
preempt_enable_notrace();
return tod;
}
@@ -240,7 +245,7 @@ static inline unsigned long get_tod_clock_monotonic(void)
* -> ns = (th * 125) + ((tl * 125) >> 9);
*
*/
-static inline unsigned long tod_to_ns(unsigned long todval)
+static __always_inline unsigned long tod_to_ns(unsigned long todval)
{
return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
}
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 924b876f992c..f7bae1c63bd6 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -2,7 +2,7 @@
/*
* Userspace interface to the pkey device driver
*
- * Copyright IBM Corp. 2017, 2019
+ * Copyright IBM Corp. 2017, 2023
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -32,10 +32,15 @@
#define MINKEYBLOBSIZE SECKEYBLOBSIZE
/* defines for the type field within the pkey_protkey struct */
-#define PKEY_KEYTYPE_AES_128 1
-#define PKEY_KEYTYPE_AES_192 2
-#define PKEY_KEYTYPE_AES_256 3
-#define PKEY_KEYTYPE_ECC 4
+#define PKEY_KEYTYPE_AES_128 1
+#define PKEY_KEYTYPE_AES_192 2
+#define PKEY_KEYTYPE_AES_256 3
+#define PKEY_KEYTYPE_ECC 4
+#define PKEY_KEYTYPE_ECC_P256 5
+#define PKEY_KEYTYPE_ECC_P384 6
+#define PKEY_KEYTYPE_ECC_P521 7
+#define PKEY_KEYTYPE_ECC_ED25519 8
+#define PKEY_KEYTYPE_ECC_ED448 9
/* the newer ioctls use a pkey_key_type enum for type information */
enum pkey_key_type {
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 8a617be28bb4..7af69948b290 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads;
+ size_t alloc_size;
int mem_chunk_cnt;
void *ptr, *hdr;
- u32 alloc_size;
u64 hdr_off;
/* If we are not in kdump or zfcp/nvme dump mode return */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 34674e38826b..9f41853f36b9 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -34,14 +34,12 @@ void kernel_stack_overflow(struct pt_regs * regs);
void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs);
-void __init init_IRQ(void);
void do_io_irq(struct pt_regs *regs);
void do_ext_irq(struct pt_regs *regs);
void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
-void __init time_init(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f44f70de9661..85a00d97a314 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -176,6 +176,8 @@ static bool reipl_fcp_clear;
static bool reipl_ccw_clear;
static bool reipl_eckd_clear;
+static unsigned long os_info_flags;
+
static inline int __diag308(unsigned long subcode, unsigned long addr)
{
union register_pair r1;
@@ -1938,6 +1940,20 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
struct lowcore *abs_lc;
unsigned int csum;
+ /*
+ * Set REIPL_CLEAR flag in os_info flags entry indicating
+ * 'clear' sysfs attribute has been set on the panicked system
+ * for specified reipl type.
+ * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+ */
+ if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+ (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+ (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+ (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+ reipl_type == IPL_TYPE_NSS ||
+ reipl_type == IPL_TYPE_UNKNOWN)
+ os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+ os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore();
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index f1b35dcdf3eb..42215f9404af 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -352,7 +352,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
- val += (Elf_Addr) me->mem[MOD_TEXT].base - loc;
+ val += (Elf_Addr)me->mem[MOD_TEXT].base +
+ me->arch.got_offset - loc;
rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index cf1b6e8a708d..90679143534b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -76,6 +76,7 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
}
struct cpu_cf_events {
+ refcount_t refcnt; /* Reference count */
atomic_t ctr_set[CPUMF_CTR_SET_MAX];
u64 state; /* For perf_event_open SVC */
u64 dev_state; /* For /dev/hwctr */
@@ -88,9 +89,6 @@ struct cpu_cf_events {
unsigned int sets; /* # Counter set saved in memory */
};
-/* Per-CPU event structure for the counter facility */
-static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
-
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
static debug_info_t *cf_dbg;
@@ -103,6 +101,221 @@ static debug_info_t *cf_dbg;
*/
static struct cpumf_ctr_info cpumf_ctr_info;
+struct cpu_cf_ptr {
+ struct cpu_cf_events *cpucf;
+};
+
+static struct cpu_cf_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ * CPU hot plug remove can not happen. Event removal requires a close()
+ * first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ * All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
+{
+ struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
+
+ if (p) {
+ struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
+
+ return q->cpucf;
+ }
+ return NULL;
+}
+
+static struct cpu_cf_events *this_cpu_cfhw(void)
+{
+ return get_cpu_cfhw(smp_processor_id());
+}
+
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
+{
+ lcctl(0);
+}
+
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+ if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+ return;
+ free_percpu(cpu_cf_root.cfptr);
+ cpu_cf_root.cfptr = NULL;
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n",
+ __func__, refcount_read(&cpu_cf_root.refcnt),
+ cpu_cf_root.cfptr);
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+ int rc = 0;
+
+ if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+ return rc;
+
+ /* The memory is already zeroed. */
+ cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+ if (cpu_cf_root.cfptr) {
+ refcount_set(&cpu_cf_root.refcnt, 1);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ } else {
+ rc = -ENOMEM;
+ }
+
+ return rc;
+}
+
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+
+ mutex_lock(&pmc_reserve_mutex);
+ /*
+ * When invoked via CPU hotplug handler, there might be no events
+ * installed or that particular CPU might not have an
+ * event installed. This anchor pointer can be NULL!
+ */
+ if (!cpu_cf_root.cfptr)
+ goto out;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+ /*
+ * Might be zero when called from CPU hotplug handler and no event
+ * installed on that CPU, but on different CPUs.
+ */
+ if (!cpuhw)
+ goto out;
+
+ if (refcount_dec_and_test(&cpuhw->refcnt)) {
+ kfree(cpuhw);
+ p->cpucf = NULL;
+ }
+ cpum_cf_free_root();
+out:
+ mutex_unlock(&pmc_reserve_mutex);
+}
+
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+ int rc;
+
+ mutex_lock(&pmc_reserve_mutex);
+ rc = cpum_cf_alloc_root();
+ if (rc)
+ goto unlock;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+
+ if (!cpuhw) {
+ cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+ if (cpuhw) {
+ p->cpucf = cpuhw;
+ refcount_set(&cpuhw->refcnt, 1);
+ } else {
+ rc = -ENOMEM;
+ }
+ } else {
+ refcount_inc(&cpuhw->refcnt);
+ }
+ if (rc) {
+ /*
+ * Error in allocation of event, decrement anchor. Since
+ * cpu_cf_event in not created, its destroy() function is not
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ cpum_cf_free_root();
+ }
+unlock:
+ mutex_unlock(&pmc_reserve_mutex);
+ return rc;
+}
+
+/*
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
+ */
+static int cpum_cf_alloc(int cpu)
+{
+ cpumask_var_t mask;
+ int rc;
+
+ if (cpu == -1) {
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ for_each_online_cpu(cpu) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (rc) {
+ for_each_cpu(cpu, mask)
+ cpum_cf_free_cpu(cpu);
+ break;
+ }
+ cpumask_set_cpu(cpu, mask);
+ }
+ free_cpumask_var(mask);
+ } else {
+ rc = cpum_cf_alloc_cpu(cpu);
+ }
+ return rc;
+}
+
+static void cpum_cf_free(int cpu)
+{
+ if (cpu == -1) {
+ for_each_online_cpu(cpu)
+ cpum_cf_free_cpu(cpu);
+ } else {
+ cpum_cf_free_cpu(cpu);
+ }
+}
+
#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
/* interval in seconds */
@@ -451,10 +664,10 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
*/
static void cpumf_pmu_enable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int err;
- if (cpuhw->flags & PMU_F_ENABLED)
+ if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
return;
err = lcctl(cpuhw->state | cpuhw->dev_state);
@@ -471,11 +684,11 @@ static void cpumf_pmu_enable(struct pmu *pmu)
*/
static void cpumf_pmu_disable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- int err;
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
u64 inactive;
+ int err;
- if (!(cpuhw->flags & PMU_F_ENABLED))
+ if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
return;
inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
@@ -487,58 +700,10 @@ static void cpumf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}
-#define PMC_INIT 0UL
-#define PMC_RELEASE 1UL
-
-static void cpum_cf_setup_cpu(void *flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- switch ((unsigned long)flags) {
- case PMC_INIT:
- cpuhw->flags |= PMU_F_RESERVED;
- break;
-
- case PMC_RELEASE:
- cpuhw->flags &= ~PMU_F_RESERVED;
- break;
- }
-
- /* Disable CPU counter sets */
- lcctl(0);
- debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n",
- __func__, *(int *)flags, cpuhw->flags,
- cpuhw->state);
-}
-
-/* Initialize the CPU-measurement counter facility */
-static int __kernel_cpumcf_begin(void)
-{
- on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1);
- irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
-}
-
-/* Release the CPU-measurement counter facility */
-static void __kernel_cpumcf_end(void)
-{
- on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1);
- irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-}
-
-/* Number of perf events counting hardware events */
-static atomic_t num_events = ATOMIC_INIT(0);
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- __kernel_cpumcf_end();
- mutex_unlock(&pmc_reserve_mutex);
+ cpum_cf_free(event->cpu);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -562,14 +727,6 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static void cpumf_hw_inuse(void)
-{
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_inc_return(&num_events) == 1)
- __kernel_cpumcf_begin();
- mutex_unlock(&pmc_reserve_mutex);
-}
-
static int is_userspace_event(u64 ev)
{
return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
@@ -653,7 +810,8 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
/*
@@ -756,7 +914,7 @@ static void cpumf_pmu_read(struct perf_event *event)
static void cpumf_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -830,7 +988,7 @@ static int cfdiag_push_sample(struct perf_event *event,
static void cpumf_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -857,8 +1015,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
false);
if (cfdiag_diffctr(cpuhw, event->hw.config_base))
cfdiag_push_sample(event, cpuhw);
- } else if (cpuhw->flags & PMU_F_RESERVED) {
- /* Only update when PMU not hotplugged off */
+ } else {
hw_perf_event_update(event);
}
hwc->state |= PERF_HES_UPTODATE;
@@ -867,7 +1024,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
static int cpumf_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
ctr_set_enable(&cpuhw->state, event->hw.config_base);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -880,7 +1037,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
static void cpumf_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int i;
cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -912,29 +1069,83 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
-static int cpum_cf_setup(unsigned int cpu, unsigned long flags)
-{
- local_irq_disable();
- cpum_cf_setup_cpu((void *)flags);
- local_irq_enable();
- return 0;
-}
+static struct cfset_session { /* CPUs and counter set bit mask */
+ struct list_head head; /* Head of list of active processes */
+} cfset_session = {
+ .head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+/*
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ * and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ * pmu_delete(). The event itself is removed when the file descriptor is
+ * closed.
+ */
static int cfset_online_cpu(unsigned int cpu);
+
static int cpum_cf_online_cpu(unsigned int cpu)
{
- debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__,
- cpu, in_interrupt());
- cpum_cf_setup(cpu, PMC_INIT);
- return cfset_online_cpu(cpu);
+ int rc = 0;
+
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d "
+ "opencnt %d\n", __func__, cpu,
+ refcount_read(&cpu_cf_root.refcnt),
+ refcount_read(&cfset_opencnt));
+ /*
+ * Ignore notification for perf_event_open().
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (!rc)
+ cfset_online_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return rc;
}
static int cfset_offline_cpu(unsigned int cpu);
+
static int cpum_cf_offline_cpu(unsigned int cpu)
{
- debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu);
- cfset_offline_cpu(cpu);
- return cpum_cf_setup(cpu, PMC_RELEASE);
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n",
+ __func__, cpu, refcount_read(&cpu_cf_root.refcnt),
+ refcount_read(&cfset_opencnt));
+ /*
+ * During task exit processing of grouped perf events triggered by CPU
+ * hotplug processing, pmu_disable() is called as part of perf context
+ * removal process. Therefore do not trigger event removal now for
+ * perf_event_open() created events. Perf common code triggers event
+ * destruction when the event file descriptor is closed.
+ *
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ cfset_offline_cpu(cpu);
+ cpum_cf_free_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
}
/* Return true if store counter set multiple instruction is available */
@@ -953,13 +1164,13 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
return;
inc_irq_stat(IRQEXT_CMC);
- cpuhw = this_cpu_ptr(&cpu_cf_events);
/*
* Measurement alerts are shared and might happen when the PMU
* is not reserved. Ignore these alerts in this case.
*/
- if (!(cpuhw->flags & PMU_F_RESERVED))
+ cpuhw = this_cpu_cfhw();
+ if (!cpuhw)
return;
/* counter authorization change alert */
@@ -1039,19 +1250,11 @@ out1:
* counter set via normal file operations.
*/
-static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */
-static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */
struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
unsigned int sets; /* Counter set bit mask */
atomic_t cpus_ack; /* # CPUs successfully executed func */
};
-static struct cfset_session { /* CPUs and counter set bit mask */
- struct list_head head; /* Head of list of active processes */
-} cfset_session = {
- .head = LIST_HEAD_INIT(cfset_session.head)
-};
-
struct cfset_request { /* CPUs and counter set bit mask */
unsigned long ctrset; /* Bit mask of counter set to read */
cpumask_t mask; /* CPU mask to read from */
@@ -1113,11 +1316,11 @@ static void cfset_session_add(struct cfset_request *p)
/* Stop all counter sets via ioctl interface */
static void cfset_ioctl_off(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
- /* Check if any counter set used by /dev/hwc */
+ /* Check if any counter set used by /dev/hwctr */
for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
if ((p->sets & cpumf_ctr_ctl[rc])) {
if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
@@ -1141,7 +1344,7 @@ static void cfset_ioctl_off(void *parm)
/* Start counter sets on particular CPU */
static void cfset_ioctl_on(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
@@ -1163,7 +1366,7 @@ static void cfset_ioctl_on(void *parm)
static void cfset_release_cpu(void *p)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int rc;
debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n",
@@ -1203,27 +1406,41 @@ static int cfset_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
}
- if (!atomic_dec_return(&cfset_opencnt))
+ if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */
on_each_cpu(cfset_release_cpu, NULL, 1);
+ cpum_cf_free(-1);
+ }
mutex_unlock(&cfset_ctrset_mutex);
-
- hw_perf_event_destroy(NULL);
return 0;
}
+/*
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
+ */
static int cfset_open(struct inode *inode, struct file *file)
{
- if (!capable(CAP_SYS_ADMIN))
+ int rc = 0;
+
+ if (!perfmon_capable())
return -EPERM;
+ file->private_data = NULL;
+
mutex_lock(&cfset_ctrset_mutex);
- if (atomic_inc_return(&cfset_opencnt) == 1)
- cfset_session_init();
+ if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */
+ rc = cpum_cf_alloc(-1);
+ if (!rc) {
+ cfset_session_init();
+ refcount_set(&cfset_opencnt, 1);
+ }
+ }
mutex_unlock(&cfset_ctrset_mutex);
- cpumf_hw_inuse();
- file->private_data = NULL;
/* nonseekable_open() never fails */
- return nonseekable_open(inode, file);
+ return rc ?: nonseekable_open(inode, file);
}
static int cfset_all_start(struct cfset_request *req)
@@ -1280,7 +1497,7 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
ctrset_read = (struct s390_ctrset_read __user *)arg;
uptr = ctrset_read->data;
for_each_cpu(cpu, mask) {
- struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu);
+ struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
struct s390_ctrset_cpudata __user *ctrset_cpudata;
ctrset_cpudata = uptr;
@@ -1324,7 +1541,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
/* Read all counter sets. */
static void cfset_cpu_read(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int set, set_size;
size_t space;
@@ -1348,9 +1565,9 @@ static void cfset_cpu_read(void *parm)
cpuhw->used += space;
cpuhw->sets += 1;
}
+ debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
+ cpuhw->sets, cpuhw->used);
}
- debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
- cpuhw->sets, cpuhw->used);
}
static int cfset_all_read(unsigned long arg, struct cfset_request *req)
@@ -1502,6 +1719,7 @@ static struct miscdevice cfset_dev = {
.name = S390_HWCTR_DEVICE,
.minor = MISC_DYNAMIC_MINOR,
.fops = &cfset_fops,
+ .mode = 0666,
};
/* Hotplug add of a CPU. Scan through all active processes and add
@@ -1512,7 +1730,6 @@ static int cfset_online_cpu(unsigned int cpu)
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1520,19 +1737,18 @@ static int cfset_online_cpu(unsigned int cpu)
cpumask_set_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
/* Hotplug remove of a CPU. Scan through all active processes and clear
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
*/
static int cfset_offline_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1540,7 +1756,6 @@ static int cfset_offline_cpu(unsigned int cpu)
cpumask_clear_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
@@ -1618,7 +1833,8 @@ static int cfdiag_event_init(struct perf_event *event)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
err = cfdiag_event_init2(event);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 7ef72f5ff52e..8ecfbce4ac92 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1271,16 +1271,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
}
}
-static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
-{
- asm volatile(
- " cdsg %[old],%[new],%[ptr]\n"
- : [old] "+d" (old), [ptr] "+QS" (*ptr)
- : [new] "d" (new)
- : "memory", "cc");
- return old;
-}
-
/* hw_perf_event_update() - Process sampling buffer
* @event: The perf event
* @flush_all: Flag to also flush partially filled sample-data-blocks
@@ -1352,7 +1342,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
new.f = 0;
new.a = 1;
new.overflow = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
/* Advance to next sample-data-block */
@@ -1562,7 +1552,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
}
new.a = 1;
new.overflow = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
return true;
}
@@ -1636,7 +1626,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
new.a = 1;
else
new.a = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
*overflow += orig_overflow;
}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index a7b339c4fd7c..fe7d1774ded1 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -36,7 +36,7 @@ struct paicrypt_map {
unsigned long *page; /* Page for CPU to store counters */
struct pai_userdata *save; /* Page to store no-zero counters */
unsigned int active_events; /* # of PAI crypto users */
- unsigned int refcnt; /* Reference count mapped buffers */
+ refcount_t refcnt; /* Reference count mapped buffers */
enum paievt_mode mode; /* Type of event */
struct perf_event *event; /* Perf event for sampling */
};
@@ -57,10 +57,11 @@ static void paicrypt_event_destroy(struct perf_event *event)
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
- " mode %d refcnt %d\n", __func__,
+ " mode %d refcnt %u\n", __func__,
event->attr.config, event->cpu,
- cpump->active_events, cpump->mode, cpump->refcnt);
- if (!--cpump->refcnt) {
+ cpump->active_events, cpump->mode,
+ refcount_read(&cpump->refcnt));
+ if (refcount_dec_and_test(&cpump->refcnt)) {
debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
__func__, (unsigned long)cpump->page,
cpump->save);
@@ -149,8 +150,10 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
/* Allocate memory for counter page and counter extraction.
* Only the first counting event has to allocate a page.
*/
- if (cpump->page)
+ if (cpump->page) {
+ refcount_inc(&cpump->refcnt);
goto unlock;
+ }
rc = -ENOMEM;
cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
@@ -164,18 +167,18 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
goto unlock;
}
rc = 0;
+ refcount_set(&cpump->refcnt, 1);
unlock:
/* If rc is non-zero, do not set mode and reference count */
if (!rc) {
- cpump->refcnt++;
cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
: PAI_MODE_COUNTING;
}
debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
- " mode %d refcnt %d page %#lx save %p rc %d\n",
+ " mode %d refcnt %u page %#lx save %p rc %d\n",
__func__, a->sample_period, cpump->active_events,
- cpump->mode, cpump->refcnt,
+ cpump->mode, refcount_read(&cpump->refcnt),
(unsigned long)cpump->page, cpump->save, rc);
mutex_unlock(&pai_reserve_mutex);
return rc;
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index fcea307d7529..3b4f384f77f7 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -50,7 +50,7 @@ struct paiext_map {
struct pai_userdata *save; /* Area to store non-zero counters */
enum paievt_mode mode; /* Type of event */
unsigned int active_events; /* # of PAI Extension users */
- unsigned int refcnt;
+ refcount_t refcnt;
struct perf_event *event; /* Perf event for sampling */
struct paiext_cb *paiext_cb; /* PAI extension control block area */
};
@@ -60,14 +60,14 @@ struct paiext_mapptr {
};
static struct paiext_root { /* Anchor to per CPU data */
- int refcnt; /* Overall active events */
+ refcount_t refcnt; /* Overall active events */
struct paiext_mapptr __percpu *mapptr;
} paiext_root;
/* Free per CPU data when the last event is removed. */
static void paiext_root_free(void)
{
- if (!--paiext_root.refcnt) {
+ if (refcount_dec_and_test(&paiext_root.refcnt)) {
free_percpu(paiext_root.mapptr);
paiext_root.mapptr = NULL;
}
@@ -80,7 +80,7 @@ static void paiext_root_free(void)
*/
static int paiext_root_alloc(void)
{
- if (++paiext_root.refcnt == 1) {
+ if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
/* The memory is already zeroed. */
paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
if (!paiext_root.mapptr) {
@@ -91,6 +91,7 @@ static int paiext_root_alloc(void)
*/
return -ENOMEM;
}
+ refcount_set(&paiext_root.refcnt, 1);
}
return 0;
}
@@ -122,7 +123,7 @@ static void paiext_event_destroy(struct perf_event *event)
mutex_lock(&paiext_reserve_mutex);
cpump->event = NULL;
- if (!--cpump->refcnt) /* Last reference gone */
+ if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
mutex_unlock(&paiext_reserve_mutex);
@@ -163,7 +164,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
rc = -ENOMEM;
cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
if (!cpump)
- goto unlock;
+ goto undo;
/* Allocate memory for counter area and counter extraction.
* These are
@@ -183,8 +184,9 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
GFP_KERNEL);
if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
paiext_free(mp);
- goto unlock;
+ goto undo;
}
+ refcount_set(&cpump->refcnt, 1);
cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
: PAI_MODE_COUNTING;
} else {
@@ -195,15 +197,15 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
if (cpump->mode == PAI_MODE_SAMPLING ||
(cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
rc = -EBUSY;
- goto unlock;
+ goto undo;
}
+ refcount_inc(&cpump->refcnt);
}
rc = 0;
cpump->event = event;
- ++cpump->refcnt;
-unlock:
+undo:
if (rc) {
/* Error in allocation of event, decrement anchor. Since
* the event in not created, its destroy() function is never
@@ -211,6 +213,7 @@ unlock:
*/
paiext_root_free();
}
+unlock:
mutex_unlock(&paiext_reserve_mutex);
/* If rc is non-zero, no increment of counter/sampler was done. */
return rc;
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index b68f47541169..a6935af2235c 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -453,3 +453,4 @@
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat sys_cachestat
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 6b7b6d5e3632..276278199c44 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -102,6 +102,11 @@ void __init time_early_init(void)
((long) qui.old_leap * 4096000000L);
}
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+ return tod_to_ns(__get_tod_clock_monotonic());
+}
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 273a0281a189..66f0eb1c872b 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -302,6 +302,8 @@ again:
rc = -ENXIO;
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ if (!ptep)
+ goto out;
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
page = pte_page(*ptep);
rc = -EAGAIN;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index da6dac36e959..9bd0a873f3b1 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2777,7 +2777,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
mmap_read_lock(kvm->mm);
get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
- &page, NULL, NULL);
+ &page, NULL);
mmap_read_unlock(kvm->mm);
return page;
}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 580d2e3265cb..7c50eca85ca4 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,7 @@
# Makefile for s390-specific library files..
#
-lib-y += delay.o string.o uaccess.o find.o spinlock.o
+lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
obj-y += mem.o xor.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S
new file mode 100644
index 000000000000..de33cf02cfd2
--- /dev/null
+++ b/arch/s390/lib/tishift.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/export.h>
+
+ .section .noinstr.text, "ax"
+
+ GEN_BR_THUNK %r14
+
+SYM_FUNC_START(__ashlti3)
+ lmg %r0,%r1,0(%r3)
+ cije %r4,0,1f
+ lhi %r3,64
+ sr %r3,%r4
+ jnh 0f
+ srlg %r3,%r1,0(%r3)
+ sllg %r0,%r0,0(%r4)
+ sllg %r1,%r1,0(%r4)
+ ogr %r0,%r3
+ j 1f
+0: sllg %r0,%r1,-64(%r4)
+ lghi %r1,0
+1: stmg %r0,%r1,0(%r2)
+ BR_EX %r14
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
+
+SYM_FUNC_START(__ashrti3)
+ lmg %r0,%r1,0(%r3)
+ cije %r4,0,1f
+ lhi %r3,64
+ sr %r3,%r4
+ jnh 0f
+ sllg %r3,%r0,0(%r3)
+ srlg %r1,%r1,0(%r4)
+ srag %r0,%r0,0(%r4)
+ ogr %r1,%r3
+ j 1f
+0: srag %r1,%r0,-64(%r4)
+ srag %r0,%r0,63
+1: stmg %r0,%r1,0(%r2)
+ BR_EX %r14
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__lshrti3)
+ lmg %r0,%r1,0(%r3)
+ cije %r4,0,1f
+ lhi %r3,64
+ sr %r3,%r4
+ jnh 0f
+ sllg %r3,%r0,0(%r3)
+ srlg %r1,%r1,0(%r4)
+ srlg %r0,%r0,0(%r4)
+ ogr %r1,%r3
+ j 1f
+0: srlg %r1,%r0,-64(%r4)
+ lghi %r0,0
+1: stmg %r0,%r1,0(%r2)
+ BR_EX %r14
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index b65144c392b0..dbe8394234e2 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -457,8 +457,9 @@ retry:
if (unlikely(vma->vm_start > address)) {
if (!(vma->vm_flags & VM_GROWSDOWN))
goto out_up;
- if (expand_stack(vma, address))
- goto out_up;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto out;
}
/*
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index dc90d1eb0d55..f4b6fc746fce 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -895,12 +895,12 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
/**
* gmap_pte_op_end - release the page table lock
- * @ptl: pointer to the spinlock pointer
+ * @ptep: pointer to the locked pte
+ * @ptl: pointer to the page table spinlock
*/
-static void gmap_pte_op_end(spinlock_t *ptl)
+static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
{
- if (ptl)
- spin_unlock(ptl);
+ pte_unmap_unlock(ptep, ptl);
}
/**
@@ -1011,7 +1011,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
{
int rc;
pte_t *ptep;
- spinlock_t *ptl = NULL;
+ spinlock_t *ptl;
unsigned long pbits = 0;
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
@@ -1025,7 +1025,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
/* Protect and unlock. */
rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(ptep, ptl);
return rc;
}
@@ -1154,7 +1154,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
/* Do *NOT* clear the _PAGE_INVALID bit! */
rc = 0;
}
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(ptep, ptl);
}
if (!rc)
break;
@@ -1248,7 +1248,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
if (!rc)
gmap_insert_rmap(sg, vmaddr, rmap);
spin_unlock(&sg->guest_table_lock);
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(ptep, ptl);
}
radix_tree_preload_end();
if (rc) {
@@ -2156,7 +2156,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
if (!tptep) {
spin_unlock(&sg->guest_table_lock);
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(sptep, ptl);
radix_tree_preload_end();
break;
}
@@ -2167,7 +2167,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
rmap = NULL;
rc = 0;
}
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(sptep, ptl);
spin_unlock(&sg->guest_table_lock);
}
radix_tree_preload_end();
@@ -2495,7 +2495,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
continue;
if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
set_bit(i, bitmap);
- spin_unlock(ptl);
+ pte_unmap_unlock(ptep, ptl);
}
}
gmap_pmd_op_end(gmap, pmdp);
@@ -2537,7 +2537,12 @@ static inline void thp_split_mm(struct mm_struct *mm)
* Remove all empty zero pages from the mapping for lazy refaulting
* - This must be called after mm->context.has_pgste is set, to avoid
* future creation of zero pages
- * - This must be called after THP was enabled
+ * - This must be called after THP was disabled.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * racing with the loop below and so causing pte_offset_map_lock() to fail,
+ * it will never insert a page table containing empty zero pages once
+ * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
*/
static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
unsigned long end, struct mm_walk *walk)
@@ -2549,6 +2554,8 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
spinlock_t *ptl;
ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!ptep)
+ break;
if (is_zero_pfn(pte_pfn(*ptep)))
ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
pte_unmap_unlock(ptep, ptl);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 5ba3bd8a7b12..ca5a418c58a8 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -4,6 +4,7 @@
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
#include <linux/hugetlb.h>
+#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6effb24de6d9..3bd2ab2a9a34 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -829,7 +829,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
default:
return -EFAULT;
}
-
+again:
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
@@ -850,6 +850,8 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
spin_unlock(ptl);
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!ptep)
+ goto again;
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
@@ -938,7 +940,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
default:
return -EFAULT;
}
-
+again:
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
@@ -955,6 +957,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
spin_unlock(ptl);
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!ptep)
+ goto again;
new = old = pgste_get_lock(ptep);
/* Reset guest reference bit only */
pgste_val(new) &= ~PGSTE_GR_BIT;
@@ -1000,7 +1004,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
default:
return -EFAULT;
}
-
+again:
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
@@ -1017,6 +1021,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
spin_unlock(ptl);
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!ptep)
+ goto again;
pgste = pgste_get_lock(ptep);
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
paddr = pte_val(*ptep) & PAGE_MASK;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 5b22c6e24528..b9dcb4ae6c59 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -667,7 +667,15 @@ static void __init memblock_region_swap(void *a, void *b, int size)
#ifdef CONFIG_KASAN
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static inline int set_memory_kasan(unsigned long start, unsigned long end)
+{
+ start = PAGE_ALIGN_DOWN(__sha(start));
+ end = PAGE_ALIGN(__sha(end));
+ return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
+}
#endif
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
@@ -737,10 +745,8 @@ void __init vmem_map_init(void)
}
#ifdef CONFIG_KASAN
- for_each_mem_range(i, &base, &end) {
- set_memory_rwnx(__sha(base),
- (__sha(end) - __sha(base)) >> PAGE_SHIFT);
- }
+ for_each_mem_range(i, &base, &end)
+ set_memory_kasan(base, end);
#endif
set_memory_rox((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT);
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index cc8cf5abea15..4e930f566878 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -10,7 +10,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)