From 0188d08a46ffe4a39c6b463451a41d8b503d04d6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 12 Jun 2020 13:06:07 +0200 Subject: s390: convert to msecs_to_jiffies() Instead of using the old 'jiffies + HZ {/,*} something' calculation use msecs_to_jiffies() as that makes the code more readable. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/lgr.c | 2 +- arch/s390/kernel/time.c | 2 +- arch/s390/kernel/topology.c | 4 ++-- arch/s390/mm/cmm.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index 452502f9a0d9..3b895971c3d0 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -167,7 +167,7 @@ static struct timer_list lgr_timer; */ static void lgr_timer_set(void) { - mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ); + mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC)); } /* diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index b1113b519432..6bc20861fff9 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -669,7 +669,7 @@ static void stp_work_fn(struct work_struct *work) * There is a usable clock but the synchonization failed. * Retry after a second. */ - mod_timer(&stp_timer, jiffies + HZ); + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); out_unlock: mutex_unlock(&stp_work_mutex); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 332b542548cd..ca47141a5be9 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -356,9 +356,9 @@ static atomic_t topology_poll = ATOMIC_INIT(0); static void set_topology_timer(void) { if (atomic_add_unless(&topology_poll, -1, 0)) - mod_timer(&topology_timer, jiffies + HZ / 10); + mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100)); else - mod_timer(&topology_timer, jiffies + HZ * 60); + mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC)); } void topology_expect_change(void) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 36bce727897b..5c15ae3daf71 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -189,7 +189,7 @@ static void cmm_set_timer(void) del_timer(&cmm_timer); return; } - mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ); + mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC)); } static void cmm_timer_fn(struct timer_list *unused) -- cgit v1.2.3 From 90ce70f06546e646713d036cfdec39427df296f7 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 12 May 2020 09:54:58 +0200 Subject: s390/pci: remove unused functions Signed-off-by: Sven Schnelle Acked-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pci_dma.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 419fac7a62c0..f62cd3ed2d44 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -131,12 +131,6 @@ static inline void validate_st_entry(unsigned long *entry) *entry |= ZPCI_TABLE_VALID; } -static inline void invalidate_table_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry |= ZPCI_TABLE_INVALID; -} - static inline void invalidate_pt_entry(unsigned long *entry) { WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); @@ -173,11 +167,6 @@ static inline int pt_entry_isvalid(unsigned long entry) return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; } -static inline int entry_isprotected(unsigned long entry) -{ - return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED; -} - static inline unsigned long *get_rt_sto(unsigned long entry) { return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) -- cgit v1.2.3 From 7fa0d6ff35cfaae9cc7012d9220cd24400c650f1 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 12 May 2020 09:55:18 +0200 Subject: s390/time: remove unused function Signed-off-by: Sven Schnelle Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/timex.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 6bf3a45ccfec..289aaff4d365 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -49,11 +49,6 @@ static inline void set_clock_comparator(__u64 time) asm volatile("sckc %0" : : "Q" (time)); } -static inline void store_clock_comparator(__u64 *time) -{ - asm volatile("stckc %0" : "=Q" (*time)); -} - void clock_comparator_work(void); void __init time_early_init(void); -- cgit v1.2.3 From ecb1ff6833c461ea3bcf16396cd4f1eb50b119c2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 18 Jun 2020 07:09:57 +0200 Subject: s390/debug: remove raw view There is not a single user of the debug raw view. Therefore remove it before anybody uses it. If anybody would make use of the view it would expose the struct __debug_entry definition to userspace and really would make it uapi. This wouldn't be good, since the definition is suboptimal and needs to be changed. Right now the structure definition is only defined to be uapi, however there is no user. Signed-off-by: Heiko Carstens --- Documentation/s390/s390dbf.rst | 17 ++++------------- arch/s390/include/asm/debug.h | 1 - arch/s390/kernel/debug.c | 42 ------------------------------------------ 3 files changed, 4 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/Documentation/s390/s390dbf.rst b/Documentation/s390/s390dbf.rst index cdb36842b898..af8bdc3629e7 100644 --- a/Documentation/s390/s390dbf.rst +++ b/Documentation/s390/s390dbf.rst @@ -67,7 +67,7 @@ corresponding component. The debugfs normally should be mounted to The content of the directories are files which represent different views to the debug log. Each component can decide which views should be used through registering them with the function :c:func:`debug_register_view()`. -Predefined views for hex/ascii, sprintf and raw binary data are provided. +Predefined views for hex/ascii and sprintf data are provided. It is also possible to define other views. The content of a view can be inspected simply by reading the corresponding debugfs file. @@ -119,8 +119,6 @@ Predefined views: extern struct debug_view debug_hex_ascii_view; - extern struct debug_view debug_raw_view; - extern struct debug_view debug_sprintf_view; Examples @@ -129,7 +127,7 @@ Examples .. code-block:: c /* - * hex_ascii- + raw-view Example + * hex_ascii-view Example */ #include @@ -143,7 +141,6 @@ Examples debug_info = debug_register("test", 1, 4, 4 ); debug_register_view(debug_info, &debug_hex_ascii_view); - debug_register_view(debug_info, &debug_raw_view); debug_text_event(debug_info, 4 , "one "); debug_int_exception(debug_info, 4, 4711); @@ -201,7 +198,7 @@ debugfs-files: Example:: > ls /sys/kernel/debug/s390dbf/dasd - flush hex_ascii level pages raw + flush hex_ascii level pages > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | .... 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE @@ -298,10 +295,9 @@ order to see the debug entries well formatted. Predefined Views ---------------- -There are three predefined views: hex_ascii, raw and sprintf. +There are two predefined views: hex_ascii and sprintf. The hex_ascii view shows the data field in hex and ascii representation (e.g. ``45 43 4b 44 | ECKD``). -The raw view returns a bytestream as the debug areas are stored in memory. The sprintf view formats the debug entries in the same way as the sprintf function would do. The sprintf event/exception functions write to the @@ -334,11 +330,6 @@ The format of the hex_ascii and sprintf view is as follows: - Return Address to caller - data field -The format of the raw view is: - -- Header as described in debug.h -- datafield - A typical line of the hex_ascii view will look like the following (first line is only for explanation and will not be displayed when 'cating' the view):: diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 310134015541..d39da8f3130e 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -82,7 +82,6 @@ struct debug_view { }; extern struct debug_view debug_hex_ascii_view; -extern struct debug_view debug_raw_view; extern struct debug_view debug_sprintf_view; /* do NOT use the _common functions */ diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 263075a1af36..beb4b44a11d1 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -90,27 +90,11 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, size_t user_buf_size, loff_t *offset); static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, const char *in_buf); -static int debug_raw_format_fn(debug_info_t *id, - struct debug_view *view, char *out_buf, - const char *in_buf); -static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf); - static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, debug_sprintf_entry_t *curr_event); /* globals */ -struct debug_view debug_raw_view = { - "raw", - NULL, - &debug_raw_header_fn, - &debug_raw_format_fn, - NULL, - NULL -}; -EXPORT_SYMBOL(debug_raw_view); - struct debug_view debug_hex_ascii_view = { "hex_ascii", NULL, @@ -1385,32 +1369,6 @@ out: return rc; /* number of input characters */ } -/* - * prints debug header in raw format - */ -static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view, - int area, debug_entry_t *entry, char *out_buf) -{ - int rc; - - rc = sizeof(debug_entry_t); - memcpy(out_buf, entry, sizeof(debug_entry_t)); - return rc; -} - -/* - * prints debug data in raw format - */ -static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, const char *in_buf) -{ - int rc; - - rc = id->buf_size; - memcpy(out_buf, in_buf, id->buf_size); - return rc; -} - /* * prints debug data in hex/ascii format */ -- cgit v1.2.3 From 6ffb3f6b46d0d02c318946047dc5ce6553495848 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 18 Jun 2020 07:41:18 +0200 Subject: s390/debug: remove struct __debug_entry from uapi There is no interface to userspace which exposes anything that would require the struct __debug_entry definition. Therefore remove it from uapi. This allows to change the definition, since it is only kernel internally used. The only exception is the crash utility, however that tool must handle changes all the time anyway. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/debug.h | 17 ++++++++++++++++- arch/s390/include/uapi/asm/debug.h | 35 ----------------------------------- 2 files changed, 16 insertions(+), 36 deletions(-) delete mode 100644 arch/s390/include/uapi/asm/debug.h (limited to 'arch') diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index d39da8f3130e..17a26261f288 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */ #define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */ @@ -26,6 +26,21 @@ #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */ /* the entry information */ +#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ + +struct __debug_entry { + union { + struct { + unsigned long clock : 52; + unsigned long exception : 1; + unsigned long level : 3; + unsigned long cpuid : 8; + } fields; + unsigned long stck; + } id; + void *caller; +} __packed; + typedef struct __debug_entry debug_entry_t; struct debug_view; diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h deleted file mode 100644 index c7c564d9aea4..000000000000 --- a/arch/s390/include/uapi/asm/debug.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S/390 debug facility - * - * Copyright IBM Corp. 1999, 2000 - */ - -#ifndef _UAPIDEBUG_H -#define _UAPIDEBUG_H - -#include - -/* Note: - * struct __debug_entry must be defined outside of #ifdef __KERNEL__ - * in order to allow a user program to analyze the 'raw'-view. - */ - -struct __debug_entry{ - union { - struct { - unsigned long long clock:52; - unsigned long long exception:1; - unsigned long long level:3; - unsigned long long cpuid:8; - } fields; - - unsigned long long stck; - } id; - void* caller; -} __attribute__((packed)); - - -#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */ - -#endif /* _UAPIDEBUG_H */ -- cgit v1.2.3 From 28ccce5f50af2e9484d6b74b22ff9eb54bb775a2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 16:29:30 -0500 Subject: s390/appldata: use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Message-Id: <20200617212930.GA11728@embeddedor> Signed-off-by: Heiko Carstens --- arch/s390/appldata/appldata_os.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 5503217366ec..a363d30ce739 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -129,8 +129,7 @@ static void appldata_get_os_data(void *data) os_data->nr_cpus = j; - new_size = sizeof(struct appldata_os_data) + - (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu)); + new_size = struct_size(os_data, os_cpu, os_data->nr_cpus); if (ops.size != new_size) { if (ops.active) { rc = appldata_diag(APPLDATA_RECORD_OS_ID, @@ -165,8 +164,7 @@ static int __init appldata_os_init(void) { int rc, max_size; - max_size = sizeof(struct appldata_os_data) + - (num_possible_cpus() * sizeof(struct appldata_os_per_cpu)); + max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus()); if (max_size > APPLDATA_MAX_REC_SIZE) { pr_err("Maximum OS record size %i exceeds the maximum " "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE); -- cgit v1.2.3 From 66a049b764a71dc32031b7b533f98fc0299e6e11 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 25 Jun 2020 21:53:17 +0200 Subject: s390/stp: allow group and users to read stp sysfs files There are no secrets in these files, so allow all users to read it. Signed-off-by: Sven Schnelle Acked-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6bc20861fff9..700127ba689d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -683,7 +683,7 @@ static struct bus_type stp_subsys = { .dev_name = "stp", }; -static ssize_t stp_ctn_id_show(struct device *dev, +static ssize_t ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -693,9 +693,9 @@ static ssize_t stp_ctn_id_show(struct device *dev, *(unsigned long long *) stp_info.ctnid); } -static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); +static DEVICE_ATTR_RO(ctn_id); -static ssize_t stp_ctn_type_show(struct device *dev, +static ssize_t ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -704,9 +704,9 @@ static ssize_t stp_ctn_type_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.ctn); } -static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); +static DEVICE_ATTR_RO(ctn_type); -static ssize_t stp_dst_offset_show(struct device *dev, +static ssize_t dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -715,9 +715,9 @@ static ssize_t stp_dst_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); } -static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); +static DEVICE_ATTR_RO(dst_offset); -static ssize_t stp_leap_seconds_show(struct device *dev, +static ssize_t leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -726,9 +726,9 @@ static ssize_t stp_leap_seconds_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); } -static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); +static DEVICE_ATTR_RO(leap_seconds); -static ssize_t stp_stratum_show(struct device *dev, +static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -737,9 +737,9 @@ static ssize_t stp_stratum_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); } -static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); +static DEVICE_ATTR_RO(stratum); -static ssize_t stp_time_offset_show(struct device *dev, +static ssize_t time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -748,9 +748,9 @@ static ssize_t stp_time_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int) stp_info.tto); } -static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); +static DEVICE_ATTR_RO(time_offset); -static ssize_t stp_time_zone_offset_show(struct device *dev, +static ssize_t time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -759,10 +759,9 @@ static ssize_t stp_time_zone_offset_show(struct device *dev, return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); } -static DEVICE_ATTR(time_zone_offset, 0400, - stp_time_zone_offset_show, NULL); +static DEVICE_ATTR_RO(time_zone_offset); -static ssize_t stp_timing_mode_show(struct device *dev, +static ssize_t timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -771,9 +770,9 @@ static ssize_t stp_timing_mode_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.tmd); } -static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); +static DEVICE_ATTR_RO(timing_mode); -static ssize_t stp_timing_state_show(struct device *dev, +static ssize_t timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -782,16 +781,16 @@ static ssize_t stp_timing_state_show(struct device *dev, return sprintf(buf, "%i\n", stp_info.tst); } -static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); +static DEVICE_ATTR_RO(timing_state); -static ssize_t stp_online_show(struct device *dev, +static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%i\n", stp_online); } -static ssize_t stp_online_store(struct device *dev, +static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -817,18 +816,14 @@ static ssize_t stp_online_store(struct device *dev, * Can't use DEVICE_ATTR because the attribute should be named * stp/online but dev_attr_online already exists in this file .. */ -static struct device_attribute dev_attr_stp_online = { - .attr = { .name = "online", .mode = 0600 }, - .show = stp_online_show, - .store = stp_online_store, -}; +static DEVICE_ATTR_RW(online); static struct device_attribute *stp_attributes[] = { &dev_attr_ctn_id, &dev_attr_ctn_type, &dev_attr_dst_offset, &dev_attr_leap_seconds, - &dev_attr_stp_online, + &dev_attr_online, &dev_attr_stratum, &dev_attr_time_offset, &dev_attr_time_zone_offset, -- cgit v1.2.3 From f05f62d04271faa265c7a4f75638ebc380d182fa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 25 Jun 2020 17:00:29 +0200 Subject: s390/vmem: get rid of memory segment list I can't come up with a satisfying reason why we still need the memory segment list. We used to represent in the list: - boot memory - standby memory added via add_memory() - loaded dcss segments When loading/unloading dcss segments, we already track them in a separate list and check for overlaps (arch/s390/mm/extmem.c:segment_overlaps_others()) when loading segments. The overlap check was introduced for some segments in commit b2300b9efe1b ("[S390] dcssblk: add >2G DCSSs support and stacked contiguous DCSSs support.") and was extended to cover all dcss segments in commit ca57114609d1 ("s390/extmem: remove code for 31 bit addressing mode"). Although I doubt that overlaps with boot memory and standby memory are relevant, let's reshuffle the checks in load_segment() to request the resource first. This will bail out in case we have overlaps with other resources (esp. boot memory and standby memory). The order is now different compared to segment_unload() and segment_unload(), but that should not matter. This smells like a leftover from ancient times, let's get rid of it. We can now convert vmem_remove_mapping() into a void function - everybody ignored the return value already. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Andrew Morton Signed-off-by: David Hildenbrand Message-Id: <20200625150029.45019-1-david@redhat.com> Reviewed-by: Gerald Schaefer Tested-by: Gerald Schaefer [DCSS] Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/mm/extmem.c | 25 ++++----- arch/s390/mm/vmem.c | 115 +++------------------------------------- 3 files changed, 21 insertions(+), 121 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 19d603bd1f36..7eb01a5459cd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1669,7 +1669,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define kern_addr_valid(addr) (1) extern int vmem_add_mapping(unsigned long start, unsigned long size); -extern int vmem_remove_mapping(unsigned long start, unsigned long size); +extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 9e0aa7aa03ba..105c09282f8c 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -313,15 +313,10 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long goto out_free; } - rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); - - if (rc) - goto out_free; - seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (seg->res == NULL) { rc = -ENOMEM; - goto out_shared; + goto out_free; } seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; seg->res->start = seg->start_addr; @@ -335,12 +330,17 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc == SEG_TYPE_SC || ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared)) seg->res->flags |= IORESOURCE_READONLY; + + /* Check for overlapping resources before adding the mapping. */ if (request_resource(&iomem_resource, seg->res)) { rc = -EBUSY; - kfree(seg->res); - goto out_shared; + goto out_free_resource; } + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); + if (rc) + goto out_resource; + if (do_nonshared) diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name, &start_addr, &end_addr); @@ -351,14 +351,14 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); rc = diag_cc; - goto out_resource; + goto out_mapping; } if (diag_cc > 1) { pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr); rc = dcss_diag_translate_rc(end_addr); dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); - goto out_resource; + goto out_mapping; } seg->start_addr = start_addr; seg->end = end_addr; @@ -377,11 +377,12 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long (void*) seg->end, segtype_string[seg->vm_segtype]); } goto out; + out_mapping: + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_resource: release_resource(seg->res); + out_free_resource: kfree(seg->res); - out_shared: - vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8b6282cf7d13..3b9e71654c37 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -20,14 +20,6 @@ static DEFINE_MUTEX(vmem_mutex); -struct memory_segment { - struct list_head list; - unsigned long start; - unsigned long size; -}; - -static LIST_HEAD(mem_segs); - static void __ref *vmem_alloc_pages(unsigned int order) { unsigned long size = PAGE_SIZE << order; @@ -300,94 +292,25 @@ void vmemmap_free(unsigned long start, unsigned long end, { } -/* - * Add memory segment to the segment list if it doesn't overlap with - * an already present segment. - */ -static int insert_memory_segment(struct memory_segment *seg) -{ - struct memory_segment *tmp; - - if (seg->start + seg->size > VMEM_MAX_PHYS || - seg->start + seg->size < seg->start) - return -ERANGE; - - list_for_each_entry(tmp, &mem_segs, list) { - if (seg->start >= tmp->start + tmp->size) - continue; - if (seg->start + seg->size <= tmp->start) - continue; - return -ENOSPC; - } - list_add(&seg->list, &mem_segs); - return 0; -} - -/* - * Remove memory segment from the segment list. - */ -static void remove_memory_segment(struct memory_segment *seg) -{ - list_del(&seg->list); -} - -static void __remove_shared_memory(struct memory_segment *seg) +void vmem_remove_mapping(unsigned long start, unsigned long size) { - remove_memory_segment(seg); - vmem_remove_range(seg->start, seg->size); -} - -int vmem_remove_mapping(unsigned long start, unsigned long size) -{ - struct memory_segment *seg; - int ret; - mutex_lock(&vmem_mutex); - - ret = -ENOENT; - list_for_each_entry(seg, &mem_segs, list) { - if (seg->start == start && seg->size == size) - break; - } - - if (seg->start != start || seg->size != size) - goto out; - - ret = 0; - __remove_shared_memory(seg); - kfree(seg); -out: + vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); - return ret; } int vmem_add_mapping(unsigned long start, unsigned long size) { - struct memory_segment *seg; int ret; - mutex_lock(&vmem_mutex); - ret = -ENOMEM; - seg = kzalloc(sizeof(*seg), GFP_KERNEL); - if (!seg) - goto out; - seg->start = start; - seg->size = size; - - ret = insert_memory_segment(seg); - if (ret) - goto out_free; + if (start + size > VMEM_MAX_PHYS || + start + size < start) + return -ERANGE; + mutex_lock(&vmem_mutex); ret = vmem_add_mem(start, size); if (ret) - goto out_remove; - goto out; - -out_remove: - __remove_shared_memory(seg); -out_free: - kfree(seg); -out: + vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); return ret; } @@ -421,27 +344,3 @@ void __init vmem_map_init(void) pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } - -/* - * Convert memblock.memory to a memory segment list so there is a single - * list that contains all memory segments. - */ -static int __init vmem_convert_memory_chunk(void) -{ - struct memblock_region *reg; - struct memory_segment *seg; - - mutex_lock(&vmem_mutex); - for_each_memblock(memory, reg) { - seg = kzalloc(sizeof(*seg), GFP_KERNEL); - if (!seg) - panic("Out of memory...\n"); - seg->start = reg->base; - seg->size = reg->size; - insert_memory_segment(seg); - } - mutex_unlock(&vmem_mutex); - return 0; -} - -core_initcall(vmem_convert_memory_chunk); -- cgit v1.2.3 From 24840e76bf8a679d26d373a0edc44284bfd9dc18 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2020 11:16:16 +0200 Subject: s390/smp: move smp_cpus_done() to header file Saves us a couple of bytes. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/smp.h | 4 ++++ arch/s390/kernel/smp.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7326f110d48c..20b37b059e2b 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -54,6 +54,10 @@ static inline int smp_get_base_cpu(int cpu) return cpu - (cpu % (smp_cpu_mtid + 1)); } +static inline void smp_cpus_done(unsigned int max_cpus) +{ +} + extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e6be63ff162a..b4f2795a123d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1012,10 +1012,6 @@ void __init smp_prepare_boot_cpu(void) smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } -void __init smp_cpus_done(unsigned int max_cpus) -{ -} - void __init smp_setup_processor_id(void) { pcpu_devices[0].address = stap(); -- cgit v1.2.3 From 8e1398f8987851bb266c1d8d911752a18e1d05b4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 1 Jul 2020 11:17:52 +0200 Subject: s390/smp: add missing linebreak Signed-off-by: Heiko Carstens --- arch/s390/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b4f2795a123d..f685a38f166d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1141,6 +1141,7 @@ static int smp_cpu_online(unsigned int cpu) return sysfs_create_group(&s->kobj, &cpu_online_attr_group); } + static int smp_cpu_pre_down(unsigned int cpu) { struct device *s = &per_cpu(cpu_device, cpu)->dev; -- cgit v1.2.3 From 0ef5d691aae0322cbab0807c184ba534536a4698 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 30 Jun 2020 10:42:40 +0200 Subject: s390/extmem: remove stale -ENOSPC comment and handling segment_load() will no longer return -ENOSPC. If a segment overlaps with storage, we now also return -EBUSY. Remove the stale comment from __segment_load() and the stale handling from segment_warning(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Andrew Morton Suggested-by: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200630084240.8283-1-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/extmem.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 105c09282f8c..5060956b8e7d 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -401,8 +401,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long * -EIO : could not perform query or load diagnose * -ENOENT : no such segment * -EOPNOTSUPP: multi-part segment cannot be used with linux - * -ENOSPC : segment cannot be used (overlaps with storage) - * -EBUSY : segment can temporarily not be used (overlaps with dcss) + * -EBUSY : segment cannot be used (overlaps with dcss or storage) * -ERANGE : segment cannot be used (exceeds kernel mapping range) * -EPERM : segment is currently loaded with incompatible permissions * -ENOMEM : out of memory @@ -627,10 +626,6 @@ void segment_warning(int rc, char *seg_name) pr_err("DCSS %s has multiple page ranges and cannot be " "loaded or queried\n", seg_name); break; - case -ENOSPC: - pr_err("DCSS %s overlaps with used storage and cannot " - "be loaded\n", seg_name); - break; case -EBUSY: pr_err("%s needs used memory resources and cannot be " "loaded or queried\n", seg_name); -- cgit v1.2.3 From 74ecbef7b90800e368809642ecc671ba4a57ab09 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 30 Apr 2020 12:23:29 +0200 Subject: s390/zcrypt: code beautification and struct field renames Some beautifications related to the internal only used struct ap_message and related code. Instead of one int carrying only the special flag now a u32 flags field is used. At struct CPRBX the pointers to additional data are now marked with __user. This caused some changes needed on code, where these structs are also used within the zcrypt misc functions. The ica_rsa_* structs now use the generic types __u8, __u32, ... instead of char, unsigned int. zcrypt_msg6 and zcrypt_msg50 use min_t() instead of min(). Signed-off-by: Harald Freudenberger Signed-off-by: Heiko Carstens --- arch/s390/include/uapi/asm/zcrypt.h | 140 ++++++++++++++++----------------- drivers/s390/crypto/ap_bus.h | 11 +-- drivers/s390/crypto/ap_queue.c | 9 ++- drivers/s390/crypto/zcrypt_ccamisc.c | 69 ++++++++-------- drivers/s390/crypto/zcrypt_cex2c.c | 15 ++-- drivers/s390/crypto/zcrypt_error.h | 4 +- drivers/s390/crypto/zcrypt_msgtype50.c | 64 +++++++-------- drivers/s390/crypto/zcrypt_msgtype6.c | 112 +++++++++++++------------- drivers/s390/crypto/zcrypt_msgtype6.h | 4 +- drivers/s390/crypto/zcrypt_queue.c | 8 +- 10 files changed, 217 insertions(+), 219 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 5a2177e96e88..22fd202856bc 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -36,12 +36,12 @@ * - length(n_modulus) = inputdatalength */ struct ica_rsa_modexpo { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *b_key; - char __user *n_modulus; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *b_key; + __u8 __user *n_modulus; }; /** @@ -59,15 +59,15 @@ struct ica_rsa_modexpo { * - length(u_mult_inv) = inputdatalength/2 + 8 */ struct ica_rsa_modexpo_crt { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *bp_key; - char __user *bq_key; - char __user *np_prime; - char __user *nq_prime; - char __user *u_mult_inv; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *bp_key; + __u8 __user *bq_key; + __u8 __user *np_prime; + __u8 __user *nq_prime; + __u8 __user *u_mult_inv; }; /** @@ -83,67 +83,67 @@ struct ica_rsa_modexpo_crt { * key block */ struct CPRBX { - unsigned short cprb_len; /* CPRB length 220 */ - unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ - unsigned char pad_000[3]; /* Alignment pad bytes */ - unsigned char func_id[2]; /* function id 0x5432 */ - unsigned char cprb_flags[4]; /* Flags */ - unsigned int req_parml; /* request parameter buffer len */ - unsigned int req_datal; /* request data buffer */ - unsigned int rpl_msgbl; /* reply message block length */ - unsigned int rpld_parml; /* replied parameter block len */ - unsigned int rpl_datal; /* reply data block len */ - unsigned int rpld_datal; /* replied data block len */ - unsigned int req_extbl; /* request extension block len */ - unsigned char pad_001[4]; /* reserved */ - unsigned int rpld_extbl; /* replied extension block len */ - unsigned char padx000[16 - sizeof(char *)]; - unsigned char *req_parmb; /* request parm block 'address' */ - unsigned char padx001[16 - sizeof(char *)]; - unsigned char *req_datab; /* request data block 'address' */ - unsigned char padx002[16 - sizeof(char *)]; - unsigned char *rpl_parmb; /* reply parm block 'address' */ - unsigned char padx003[16 - sizeof(char *)]; - unsigned char *rpl_datab; /* reply data block 'address' */ - unsigned char padx004[16 - sizeof(char *)]; - unsigned char *req_extb; /* request extension block 'addr'*/ - unsigned char padx005[16 - sizeof(char *)]; - unsigned char *rpl_extb; /* reply extension block 'address'*/ - unsigned short ccp_rtcode; /* server return code */ - unsigned short ccp_rscode; /* server reason code */ - unsigned int mac_data_len; /* Mac Data Length */ - unsigned char logon_id[8]; /* Logon Identifier */ - unsigned char mac_value[8]; /* Mac Value */ - unsigned char mac_content_flgs;/* Mac content flag byte */ - unsigned char pad_002; /* Alignment */ - unsigned short domain; /* Domain */ - unsigned char usage_domain[4];/* Usage domain */ - unsigned char cntrl_domain[4];/* Control domain */ - unsigned char S390enf_mask[4];/* S/390 enforcement mask */ - unsigned char pad_004[36]; /* reserved */ + __u16 cprb_len; /* CPRB length 220 */ + __u8 cprb_ver_id; /* CPRB version id. 0x02 */ + __u8 pad_000[3]; /* Alignment pad bytes */ + __u8 func_id[2]; /* function id 0x5432 */ + __u8 cprb_flags[4]; /* Flags */ + __u32 req_parml; /* request parameter buffer len */ + __u32 req_datal; /* request data buffer */ + __u32 rpl_msgbl; /* reply message block length */ + __u32 rpld_parml; /* replied parameter block len */ + __u32 rpl_datal; /* reply data block len */ + __u32 rpld_datal; /* replied data block len */ + __u32 req_extbl; /* request extension block len */ + __u8 pad_001[4]; /* reserved */ + __u32 rpld_extbl; /* replied extension block len */ + __u8 padx000[16 - sizeof(__u8 *)]; + __u8 __user *req_parmb; /* request parm block 'address' */ + __u8 padx001[16 - sizeof(__u8 *)]; + __u8 __user *req_datab; /* request data block 'address' */ + __u8 padx002[16 - sizeof(__u8 *)]; + __u8 __user *rpl_parmb; /* reply parm block 'address' */ + __u8 padx003[16 - sizeof(__u8 *)]; + __u8 __user *rpl_datab; /* reply data block 'address' */ + __u8 padx004[16 - sizeof(__u8 *)]; + __u8 __user *req_extb; /* request extension block 'addr'*/ + __u8 padx005[16 - sizeof(__u8 *)]; + __u8 __user *rpl_extb; /* reply extension block 'address'*/ + __u16 ccp_rtcode; /* server return code */ + __u16 ccp_rscode; /* server reason code */ + __u32 mac_data_len; /* Mac Data Length */ + __u8 logon_id[8]; /* Logon Identifier */ + __u8 mac_value[8]; /* Mac Value */ + __u8 mac_content_flgs; /* Mac content flag byte */ + __u8 pad_002; /* Alignment */ + __u16 domain; /* Domain */ + __u8 usage_domain[4]; /* Usage domain */ + __u8 cntrl_domain[4]; /* Control domain */ + __u8 S390enf_mask[4]; /* S/390 enforcement mask */ + __u8 pad_004[36]; /* reserved */ } __attribute__((packed)); /** * xcRB */ struct ica_xcRB { - unsigned short agent_ID; - unsigned int user_defined; - unsigned short request_ID; - unsigned int request_control_blk_length; - unsigned char padding1[16 - sizeof(char *)]; - char __user *request_control_blk_addr; - unsigned int request_data_length; - char padding2[16 - sizeof(char *)]; - char __user *request_data_address; - unsigned int reply_control_blk_length; - char padding3[16 - sizeof(char *)]; - char __user *reply_control_blk_addr; - unsigned int reply_data_length; - char padding4[16 - sizeof(char *)]; - char __user *reply_data_addr; - unsigned short priority_window; - unsigned int status; + __u16 agent_ID; + __u32 user_defined; + __u16 request_ID; + __u32 request_control_blk_length; + __u8 _padding1[16 - sizeof(__u8 *)]; + __u8 __user *request_control_blk_addr; + __u32 request_data_length; + __u8 _padding2[16 - sizeof(__u8 *)]; + __u8 __user *request_data_address; + __u32 reply_control_blk_length; + __u8 _padding3[16 - sizeof(__u8 *)]; + __u8 __user *reply_control_blk_addr; + __u32 reply_data_length; + __u8 __padding4[16 - sizeof(__u8 *)]; + __u8 __user *reply_data_addr; + __u16 priority_window; + __u32 status; } __attribute__((packed)); /** diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 053cc34d2ca2..69432e93643a 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -190,17 +190,18 @@ typedef enum ap_wait (ap_func_t)(struct ap_queue *queue); struct ap_message { struct list_head list; /* Request queueing. */ unsigned long long psmid; /* Message id. */ - void *message; /* Pointer to message buffer. */ - size_t length; /* Message length. */ + void *msg; /* Pointer to message buffer. */ + unsigned int len; /* Message length. */ + u32 flags; /* Flags, see AP_MSG_FLAG_xxx */ int rc; /* Return code for this message */ - void *private; /* ap driver private pointer. */ - unsigned int special:1; /* Used for special commands. */ /* receive is called from tasklet context */ void (*receive)(struct ap_queue *, struct ap_message *, struct ap_message *); }; +#define AP_MSG_FLAG_SPECIAL (1 << 16) /* flag msg as 'special' with NQAP */ + /** * ap_init_message() - Initialize ap_message. * Initialize a message before using. Otherwise this might result in @@ -218,7 +219,7 @@ static inline void ap_init_message(struct ap_message *ap_msg) */ static inline void ap_release_message(struct ap_message *ap_msg) { - kzfree(ap_msg->message); + kzfree(ap_msg->msg); kzfree(ap_msg->private); } diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 73b077dca3e6..d6cc384f294b 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -69,9 +69,9 @@ static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind) */ static inline struct ap_queue_status __ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length, - unsigned int special) + int special) { - if (special == 1) + if (special) qid |= 0x400000UL; return ap_nqap(qid, psmid, msg, length); } @@ -137,7 +137,7 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) struct ap_message *ap_msg; status = ap_dqap(aq->qid, &aq->reply->psmid, - aq->reply->message, aq->reply->length); + aq->reply->msg, aq->reply->len); switch (status.response_code) { case AP_RESPONSE_NORMAL: aq->queue_count--; @@ -216,7 +216,8 @@ static enum ap_wait ap_sm_write(struct ap_queue *aq) /* Start the next request on the queue. */ ap_msg = list_entry(aq->requestq.next, struct ap_message, list); status = __ap_send(aq->qid, ap_msg->psmid, - ap_msg->message, ap_msg->length, ap_msg->special); + ap_msg->msg, ap_msg->len, + ap_msg->flags & AP_MSG_FLAG_SPECIAL); switch (status.response_code) { case AP_RESPONSE_NORMAL: aq->queue_count++; diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 1b835398feec..3f5b61351cde 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -205,9 +205,9 @@ static int alloc_and_prep_cprbmem(size_t paramblen, preqcblk->rpl_msgbl = cprbplusparamblen; if (paramblen) { preqcblk->req_parmb = - ((u8 *) preqcblk) + sizeof(struct CPRBX); + ((u8 __user *) preqcblk) + sizeof(struct CPRBX); preqcblk->rpl_parmb = - ((u8 *) prepcblk) + sizeof(struct CPRBX); + ((u8 __user *) prepcblk) + sizeof(struct CPRBX); } *pcprbmem = cprbmem; @@ -274,7 +274,7 @@ int cca_genseckey(u16 cardnr, u16 domain, { int i, rc, keysize; int seckeysize; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct kgreqparm { @@ -320,7 +320,7 @@ int cca_genseckey(u16 cardnr, u16 domain, preqcblk->domain = domain; /* fill request cprb param block with KG request */ - preqparm = (struct kgreqparm *) preqcblk->req_parmb; + preqparm = (struct kgreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "KG", 2); preqparm->rule_array_len = sizeof(preqparm->rule_array_len); preqparm->lv1.len = sizeof(struct lv1); @@ -377,8 +377,9 @@ int cca_genseckey(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct kgrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct kgrepparm *) ptr; /* check length of the returned secure key token */ seckeysize = prepparm->lv3.keyblock.toklen @@ -415,7 +416,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize, const u8 *clrkey, u8 seckey[SECKEYBLOBSIZE]) { int rc, keysize, seckeysize; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct cmreqparm { @@ -460,7 +461,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize, preqcblk->domain = domain; /* fill request cprb param block with CM request */ - preqparm = (struct cmreqparm *) preqcblk->req_parmb; + preqparm = (struct cmreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "CM", 2); memcpy(preqparm->rule_array, "AES ", 8); preqparm->rule_array_len = @@ -514,8 +515,9 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct cmrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct cmrepparm *) ptr; /* check length of the returned secure key token */ seckeysize = prepparm->lv3.keyblock.toklen @@ -554,7 +556,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, u8 *protkey, u32 *protkeylen, u32 *protkeytype) { int rc; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct uskreqparm { @@ -605,7 +607,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, preqcblk->domain = domain; /* fill request cprb param block with USK request */ - preqparm = (struct uskreqparm *) preqcblk->req_parmb; + preqparm = (struct uskreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "US", 2); preqparm->rule_array_len = sizeof(preqparm->rule_array_len); preqparm->lv1.len = sizeof(struct lv1); @@ -646,8 +648,9 @@ int cca_sec2protkey(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct uskrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct uskrepparm *) ptr; /* check the returned keyblock */ if (prepparm->lv3.ckb.version != 0x01 && @@ -714,7 +717,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, u8 *keybuf, size_t *keybufsize) { int rc; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct gkreqparm { @@ -796,7 +799,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, preqcblk->req_parml = sizeof(struct gkreqparm); /* prepare request param block with GK request */ - preqparm = (struct gkreqparm *) preqcblk->req_parmb; + preqparm = (struct gkreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "GK", 2); preqparm->rule_array_len = sizeof(uint16_t) + 2 * 8; memcpy(preqparm->rule_array, "AES OP ", 2*8); @@ -867,8 +870,9 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct gkrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct gkrepparm *) ptr; /* do some plausibility checks on the key block */ if (prepparm->kb.len < 120 + 5 * sizeof(uint16_t) || @@ -917,7 +921,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, int *key_token_size) { int rc, n; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct rule_array_block { @@ -974,7 +978,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, preqcblk->req_parml = 0; /* prepare request param block with IP request */ - preq_ra_block = (struct rule_array_block *) preqcblk->req_parmb; + preq_ra_block = (struct rule_array_block __force *) preqcblk->req_parmb; memcpy(preq_ra_block->subfunc_code, "IP", 2); preq_ra_block->rule_array_len = sizeof(uint16_t) + 2 * 8; memcpy(preq_ra_block->rule_array, rule_array_1, 8); @@ -987,7 +991,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, } /* prepare vud block */ - preq_vud_block = (struct vud_block *) + preq_vud_block = (struct vud_block __force *) (preqcblk->req_parmb + preqcblk->req_parml); n = complete ? 0 : (clr_key_bit_size + 7) / 8; preq_vud_block->len = sizeof(struct vud_block) + n; @@ -1001,7 +1005,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, preqcblk->req_parml += preq_vud_block->len; /* prepare key block */ - preq_key_block = (struct key_block *) + preq_key_block = (struct key_block __force *) (preqcblk->req_parmb + preqcblk->req_parml); n = *key_token_size; preq_key_block->len = sizeof(struct key_block) + n; @@ -1034,8 +1038,9 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct iprepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct iprepparm *) ptr; /* do some plausibility checks on the key block */ if (prepparm->kb.len < 120 + 3 * sizeof(uint16_t) || @@ -1151,7 +1156,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, u8 *protkey, u32 *protkeylen, u32 *protkeytype) { int rc; - u8 *mem; + u8 *mem, *ptr; struct CPRBX *preqcblk, *prepcblk; struct ica_xcRB xcrb; struct aureqparm { @@ -1208,7 +1213,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, preqcblk->domain = domain; /* fill request cprb param block with AU request */ - preqparm = (struct aureqparm *) preqcblk->req_parmb; + preqparm = (struct aureqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "AU", 2); preqparm->rule_array_len = sizeof(preqparm->rule_array_len) @@ -1257,8 +1262,9 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct aurepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct aurepparm *) ptr; /* check the returned keyblock */ if (prepparm->vud.ckb.version != 0x01 && @@ -1347,7 +1353,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain, preqcblk->domain = domain; /* fill request cprb param block with FQ request */ - preqparm = (struct fqreqparm *) preqcblk->req_parmb; + preqparm = (struct fqreqparm __force *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "FQ", 2); memcpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array)); preqparm->rule_array_len = @@ -1378,8 +1384,9 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain, } /* process response cprb param block */ - prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); - prepparm = (struct fqrepparm *) prepcblk->rpl_parmb; + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct fqrepparm *) ptr; ptr = prepparm->lvdata; /* check and possibly copy reply rule array */ diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 266440168bb7..993addb726e0 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -87,24 +87,23 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) int rc, i; ap_init_message(&ap_msg); - ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; rng_type6CPRB_msgX(&ap_msg, 4, &domain); - msg = ap_msg.message; + msg = ap_msg.msg; msg->cprbx.domain = AP_QID_QUEUE(aq->qid); - rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.message, - ap_msg.length); + rc = ap_send(aq->qid, 0x0102030405060708ULL, ap_msg.msg, ap_msg.len); if (rc) goto out_free; /* Wait for the test message to complete. */ for (i = 0; i < 2 * HZ; i++) { msleep(1000 / HZ); - rc = ap_recv(aq->qid, &psmid, ap_msg.message, 4096); + rc = ap_recv(aq->qid, &psmid, ap_msg.msg, 4096); if (rc == 0 && psmid == 0x0102030405060708ULL) break; } @@ -115,13 +114,13 @@ static int zcrypt_cex2c_rng_supported(struct ap_queue *aq) goto out_free; } - reply = ap_msg.message; + reply = ap_msg.msg; if (reply->cprbx.ccp_rtcode == 0 && reply->cprbx.ccp_rscode == 0) rc = 1; else rc = 0; out_free: - free_page((unsigned long) ap_msg.message); + free_page((unsigned long) ap_msg.msg); return rc; } diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 4f4dd9d727c9..54a04f8c38ef 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -80,7 +80,7 @@ struct error_hdr { static inline int convert_error(struct zcrypt_queue *zq, struct ap_message *reply) { - struct error_hdr *ehdr = reply->message; + struct error_hdr *ehdr = reply->msg; int card = AP_QID_CARD(zq->queue->qid); int queue = AP_QID_QUEUE(zq->queue->qid); @@ -127,7 +127,7 @@ static inline int convert_error(struct zcrypt_queue *zq, struct { struct type86_hdr hdr; struct type86_fmt2_ext fmt2; - } __packed * head = reply->message; + } __packed * head = reply->msg; unsigned int apfs = *((u32 *)head->fmt2.apfs); ZCRYPT_DBF(DBF_ERR, diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index fc4295b3d801..7aedc338b445 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -207,10 +207,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, mod_len = mex->inputdatalength; if (mod_len <= 128) { - struct type50_meb1_msg *meb1 = ap_msg->message; + struct type50_meb1_msg *meb1 = ap_msg->msg; memset(meb1, 0, sizeof(*meb1)); - ap_msg->length = sizeof(*meb1); + ap_msg->len = sizeof(*meb1); meb1->header.msg_type_code = TYPE50_TYPE_CODE; meb1->header.msg_len = sizeof(*meb1); meb1->keyblock_type = TYPE50_MEB1_FMT; @@ -218,10 +218,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, exp = meb1->exponent + sizeof(meb1->exponent) - mod_len; inp = meb1->message + sizeof(meb1->message) - mod_len; } else if (mod_len <= 256) { - struct type50_meb2_msg *meb2 = ap_msg->message; + struct type50_meb2_msg *meb2 = ap_msg->msg; memset(meb2, 0, sizeof(*meb2)); - ap_msg->length = sizeof(*meb2); + ap_msg->len = sizeof(*meb2); meb2->header.msg_type_code = TYPE50_TYPE_CODE; meb2->header.msg_len = sizeof(*meb2); meb2->keyblock_type = TYPE50_MEB2_FMT; @@ -229,10 +229,10 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, exp = meb2->exponent + sizeof(meb2->exponent) - mod_len; inp = meb2->message + sizeof(meb2->message) - mod_len; } else if (mod_len <= 512) { - struct type50_meb3_msg *meb3 = ap_msg->message; + struct type50_meb3_msg *meb3 = ap_msg->msg; memset(meb3, 0, sizeof(*meb3)); - ap_msg->length = sizeof(*meb3); + ap_msg->len = sizeof(*meb3); meb3->header.msg_type_code = TYPE50_TYPE_CODE; meb3->header.msg_len = sizeof(*meb3); meb3->keyblock_type = TYPE50_MEB3_FMT; @@ -275,10 +275,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, * 512 byte modulus (4k keys). */ if (mod_len <= 128) { /* up to 1024 bit key size */ - struct type50_crb1_msg *crb1 = ap_msg->message; + struct type50_crb1_msg *crb1 = ap_msg->msg; memset(crb1, 0, sizeof(*crb1)); - ap_msg->length = sizeof(*crb1); + ap_msg->len = sizeof(*crb1); crb1->header.msg_type_code = TYPE50_TYPE_CODE; crb1->header.msg_len = sizeof(*crb1); crb1->keyblock_type = TYPE50_CRB1_FMT; @@ -289,10 +289,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, u = crb1->u + sizeof(crb1->u) - short_len; inp = crb1->message + sizeof(crb1->message) - mod_len; } else if (mod_len <= 256) { /* up to 2048 bit key size */ - struct type50_crb2_msg *crb2 = ap_msg->message; + struct type50_crb2_msg *crb2 = ap_msg->msg; memset(crb2, 0, sizeof(*crb2)); - ap_msg->length = sizeof(*crb2); + ap_msg->len = sizeof(*crb2); crb2->header.msg_type_code = TYPE50_TYPE_CODE; crb2->header.msg_len = sizeof(*crb2); crb2->keyblock_type = TYPE50_CRB2_FMT; @@ -304,10 +304,10 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, inp = crb2->message + sizeof(crb2->message) - mod_len; } else if ((mod_len <= 512) && /* up to 4096 bit key size */ (zq->zcard->max_mod_size == CEX3A_MAX_MOD_SIZE)) { - struct type50_crb3_msg *crb3 = ap_msg->message; + struct type50_crb3_msg *crb3 = ap_msg->msg; memset(crb3, 0, sizeof(*crb3)); - ap_msg->length = sizeof(*crb3); + ap_msg->len = sizeof(*crb3); crb3->header.msg_type_code = TYPE50_TYPE_CODE; crb3->header.msg_len = sizeof(*crb3); crb3->keyblock_type = TYPE50_CRB3_FMT; @@ -350,7 +350,7 @@ static int convert_type80(struct zcrypt_queue *zq, char __user *outputdata, unsigned int outputdatalength) { - struct type80_hdr *t80h = reply->message; + struct type80_hdr *t80h = reply->msg; unsigned char *data; if (t80h->len < sizeof(*t80h) + outputdatalength) { @@ -370,7 +370,7 @@ static int convert_type80(struct zcrypt_queue *zq, BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE); else BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE); - data = reply->message + t80h->len - outputdatalength; + data = reply->msg + t80h->len - outputdatalength; if (copy_to_user(outputdata, data, outputdatalength)) return -EFAULT; return 0; @@ -382,7 +382,7 @@ static int convert_response(struct zcrypt_queue *zq, unsigned int outputdatalength) { /* Response type byte is the second byte in the response. */ - unsigned char rtype = ((unsigned char *) reply->message)[1]; + unsigned char rtype = ((unsigned char *) reply->msg)[1]; switch (rtype) { case TYPE82_RSP_CODE: @@ -422,22 +422,20 @@ static void zcrypt_cex2a_receive(struct ap_queue *aq, .reply_code = REP82_ERROR_MACHINE_FAILURE, }; struct type80_hdr *t80h; - int length; + int len; /* Copy the reply message to the request message buffer. */ if (!reply) goto out; /* ap_msg->rc indicates the error */ - t80h = reply->message; + t80h = reply->msg; if (t80h->type == TYPE80_RSP_CODE) { if (aq->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) - length = min_t(int, - CEX2A_MAX_RESPONSE_SIZE, t80h->len); + len = min_t(int, CEX2A_MAX_RESPONSE_SIZE, t80h->len); else - length = min_t(int, - CEX3A_MAX_RESPONSE_SIZE, t80h->len); - memcpy(msg->message, reply->message, length); + len = min_t(int, CEX3A_MAX_RESPONSE_SIZE, t80h->len); + memcpy(msg->msg, reply->msg, len); } else - memcpy(msg->message, reply->message, sizeof(error_reply)); + memcpy(msg->msg, reply->msg, sizeof(error_reply)); out: complete((struct completion *) msg->private); } @@ -460,12 +458,10 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, ap_init_message(&ap_msg); if (zq->zcard->user_space_type == ZCRYPT_CEX2A) - ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, - GFP_KERNEL); + ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); else - ap_msg.message = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, - GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_cex2a_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -486,7 +482,7 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); out_free: - kfree(ap_msg.message); + kfree(ap_msg.msg); return rc; } @@ -506,12 +502,10 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, ap_init_message(&ap_msg); if (zq->zcard->user_space_type == ZCRYPT_CEX2A) - ap_msg.message = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, - GFP_KERNEL); + ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); else - ap_msg.message = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, - GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_cex2a_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -532,7 +526,7 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); out_free: - kfree(ap_msg.message); + kfree(ap_msg.msg); return rc; } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index fd1cbb2d6b3f..d77991c74c25 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -266,7 +266,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, struct function_and_rules_block fr; unsigned short length; char text[0]; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; int size; /* @@ -301,7 +301,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, msg->cprbx.req_parml = size - sizeof(msg->hdr) - sizeof(msg->cprbx); - ap_msg->length = size; + ap_msg->len = size; return 0; } @@ -336,7 +336,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, struct function_and_rules_block fr; unsigned short length; char text[0]; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; int size; /* @@ -370,7 +370,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, msg->fr = static_pkd_fnr; - ap_msg->length = size; + ap_msg->len = size; return 0; } @@ -400,11 +400,11 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, struct { struct type6_hdr hdr; struct CPRBX cprbx; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; int rcblen = CEIL4(xcRB->request_control_blk_length); int replylen, req_sumlen, resp_sumlen; - char *req_data = ap_msg->message + sizeof(struct type6_hdr) + rcblen; + char *req_data = ap_msg->msg + sizeof(struct type6_hdr) + rcblen; char *function_code; if (CEIL4(xcRB->request_control_blk_length) < @@ -412,10 +412,10 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, return -EINVAL; /* overflow after alignment*/ /* length checks */ - ap_msg->length = sizeof(struct type6_hdr) + + ap_msg->len = sizeof(struct type6_hdr) + CEIL4(xcRB->request_control_blk_length) + xcRB->request_data_length; - if (ap_msg->length > MSGTYPE06_MAX_MSG_SIZE) + if (ap_msg->len > MSGTYPE06_MAX_MSG_SIZE) return -EINVAL; /* @@ -480,9 +480,7 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, if (memcmp(function_code, "US", 2) == 0 || memcmp(function_code, "AU", 2) == 0) - ap_msg->special = 1; - else - ap_msg->special = 0; + ap_msg->flags |= AP_MSG_FLAG_SPECIAL; /* copy data block */ if (xcRB->request_data_length && @@ -512,7 +510,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, struct ep11_cprb cprbx; unsigned char pld_tag; /* fixed value 0x30 */ unsigned char pld_lenfmt; /* payload length format */ - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; struct pld_hdr { unsigned char func_tag; /* fixed value 0x4 */ @@ -527,7 +525,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, return -EINVAL; /* overflow after alignment*/ /* length checks */ - ap_msg->length = sizeof(struct type6_hdr) + xcRB->req_len; + ap_msg->len = sizeof(struct type6_hdr) + xcRB->req_len; if (CEIL4(xcRB->req_len) > MSGTYPE06_MAX_MSG_SIZE - (sizeof(struct type6_hdr))) return -EINVAL; @@ -569,7 +567,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, /* enable special processing based on the cprbs flags special bit */ if (msg->cprbx.flags & 0x20) - ap_msg->special = 1; + ap_msg->flags |= AP_MSG_FLAG_SPECIAL; return 0; } @@ -639,7 +637,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq, 0x35, 0x9D, 0xD3, 0xD3, 0xA7, 0x9D, 0x5D, 0x41, 0x6F, 0x65, 0x1B, 0xCF, 0xA9, 0x87, 0x91, 0x09 }; - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; unsigned short service_rc, service_rs; unsigned int reply_len, pad_len; char *data; @@ -713,8 +711,8 @@ static int convert_type86_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ica_xcRB *xcRB) { - struct type86_fmt2_msg *msg = reply->message; - char *data = reply->message; + struct type86_fmt2_msg *msg = reply->msg; + char *data = reply->msg; /* Copy CPRB to user */ if (copy_to_user(xcRB->reply_control_blk_addr, @@ -744,8 +742,8 @@ static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ep11_urb *xcRB) { - struct type86_fmt2_msg *msg = reply->message; - char *data = reply->message; + struct type86_fmt2_msg *msg = reply->msg; + char *data = reply->msg; if (xcRB->resp_len < msg->fmt2.count1) return -EINVAL; @@ -766,8 +764,8 @@ static int convert_type86_rng(struct zcrypt_queue *zq, struct type86_hdr hdr; struct type86_fmt2_ext fmt2; struct CPRBX cprbx; - } __packed * msg = reply->message; - char *data = reply->message; + } __packed * msg = reply->msg; + char *data = reply->msg; if (msg->cprbx.ccp_rtcode != 0 || msg->cprbx.ccp_rscode != 0) return -EINVAL; @@ -780,7 +778,7 @@ static int convert_response_ica(struct zcrypt_queue *zq, char __user *outputdata, unsigned int outputdatalength) { - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -820,7 +818,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ica_xcRB *xcRB) { - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -853,7 +851,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, static int convert_response_ep11_xcrb(struct zcrypt_queue *zq, struct ap_message *reply, struct ep11_urb *xcRB) { - struct type86_ep11_reply *msg = reply->message; + struct type86_ep11_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -883,7 +881,7 @@ static int convert_response_rng(struct zcrypt_queue *zq, struct ap_message *reply, char *data) { - struct type86x_reply *msg = reply->message; + struct type86x_reply *msg = reply->msg; switch (msg->hdr.type) { case TYPE82_RSP_CODE: @@ -928,32 +926,30 @@ static void zcrypt_msgtype6_receive(struct ap_queue *aq, struct response_type *resp_type = (struct response_type *) msg->private; struct type86x_reply *t86r; - int length; + int len; /* Copy the reply message to the request message buffer. */ if (!reply) goto out; /* ap_msg->rc indicates the error */ - t86r = reply->message; + t86r = reply->msg; if (t86r->hdr.type == TYPE86_RSP_CODE && t86r->cprbx.cprb_ver_id == 0x02) { switch (resp_type->type) { case CEXXC_RESPONSE_TYPE_ICA: - length = sizeof(struct type86x_reply) - + t86r->length - 2; - length = min(CEXXC_MAX_ICA_RESPONSE_SIZE, length); - memcpy(msg->message, reply->message, length); + len = sizeof(struct type86x_reply) + t86r->length - 2; + len = min_t(int, CEXXC_MAX_ICA_RESPONSE_SIZE, len); + memcpy(msg->msg, reply->msg, len); break; case CEXXC_RESPONSE_TYPE_XCRB: - length = t86r->fmt2.offset2 + t86r->fmt2.count2; - length = min(MSGTYPE06_MAX_MSG_SIZE, length); - memcpy(msg->message, reply->message, length); + len = t86r->fmt2.offset2 + t86r->fmt2.count2; + len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len); + memcpy(msg->msg, reply->msg, len); break; default: - memcpy(msg->message, &error_reply, - sizeof(error_reply)); + memcpy(msg->msg, &error_reply, sizeof(error_reply)); } } else - memcpy(msg->message, reply->message, sizeof(error_reply)); + memcpy(msg->msg, reply->msg, sizeof(error_reply)); out: complete(&(resp_type->work)); } @@ -977,25 +973,25 @@ static void zcrypt_msgtype6_receive_ep11(struct ap_queue *aq, struct response_type *resp_type = (struct response_type *)msg->private; struct type86_ep11_reply *t86r; - int length; + int len; /* Copy the reply message to the request message buffer. */ if (!reply) goto out; /* ap_msg->rc indicates the error */ - t86r = reply->message; + t86r = reply->msg; if (t86r->hdr.type == TYPE86_RSP_CODE && t86r->cprbx.cprb_ver_id == 0x04) { switch (resp_type->type) { case CEXXC_RESPONSE_TYPE_EP11: - length = t86r->fmt2.offset1 + t86r->fmt2.count1; - length = min(MSGTYPE06_MAX_MSG_SIZE, length); - memcpy(msg->message, reply->message, length); + len = t86r->fmt2.offset1 + t86r->fmt2.count1; + len = min_t(int, MSGTYPE06_MAX_MSG_SIZE, len); + memcpy(msg->msg, reply->msg, len); break; default: - memcpy(msg->message, &error_reply, sizeof(error_reply)); + memcpy(msg->msg, &error_reply, sizeof(error_reply)); } } else { - memcpy(msg->message, reply->message, sizeof(error_reply)); + memcpy(msg->msg, reply->msg, sizeof(error_reply)); } out: complete(&(resp_type->work)); @@ -1020,8 +1016,8 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, int rc; ap_init_message(&ap_msg); - ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_msgtype6_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -1043,7 +1039,7 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); out_free: - free_page((unsigned long) ap_msg.message); + free_page((unsigned long) ap_msg.msg); return rc; } @@ -1064,8 +1060,8 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, int rc; ap_init_message(&ap_msg); - ap_msg.message = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.message) + ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg.msg) return -ENOMEM; ap_msg.receive = zcrypt_msgtype6_receive; ap_msg.psmid = (((unsigned long long) current->pid) << 32) + @@ -1088,7 +1084,7 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, ap_cancel_message(zq->queue, &ap_msg); } out_free: - free_page((unsigned long) ap_msg.message); + free_page((unsigned long) ap_msg.msg); return rc; } @@ -1107,8 +1103,8 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB, .type = CEXXC_RESPONSE_TYPE_XCRB, }; - ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg->message) + ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + @@ -1162,8 +1158,8 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb, .type = CEXXC_RESPONSE_TYPE_EP11, }; - ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg->message) + ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive_ep11; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + @@ -1193,7 +1189,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq, struct ep11_cprb cprbx; unsigned char pld_tag; /* fixed value 0x30 */ unsigned char pld_lenfmt; /* payload length format */ - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; struct pld_hdr { unsigned char func_tag; /* fixed value 0x4 */ unsigned char func_len; /* fixed value 0x4 */ @@ -1256,8 +1252,8 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code, .type = CEXXC_RESPONSE_TYPE_XCRB, }; - ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg->message) + ap_msg->msg = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; ap_msg->receive = zcrypt_msgtype6_receive; ap_msg->psmid = (((unsigned long long) current->pid) << 32) + @@ -1290,7 +1286,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq, char rule[8]; short int verb_length; short int key_length; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; struct response_type *rtype = (struct response_type *)(ap_msg->private); int rc; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 41a0df5f070f..0de280a81dd4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -127,7 +127,7 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg, char rule[8]; short int verb_length; short int key_length; - } __packed * msg = ap_msg->message; + } __packed * msg = ap_msg->msg; static struct type6_hdr static_type6_hdrX = { .type = 0x06, .offset1 = 0x00000058, @@ -154,7 +154,7 @@ static inline void rng_type6CPRB_msgX(struct ap_message *ap_msg, memcpy(msg->rule, "RANDOM ", 8); msg->verb_length = 0x02; msg->key_length = 0x02; - ap_msg->length = sizeof(*msg); + ap_msg->len = sizeof(*msg); *domain = (unsigned short)msg->cprbx.domain; } diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index b7d9fa567880..8bae6ad159a7 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -107,10 +107,10 @@ struct zcrypt_queue *zcrypt_queue_alloc(size_t max_response_size) zq = kzalloc(sizeof(struct zcrypt_queue), GFP_KERNEL); if (!zq) return NULL; - zq->reply.message = kmalloc(max_response_size, GFP_KERNEL); - if (!zq->reply.message) + zq->reply.msg = kmalloc(max_response_size, GFP_KERNEL); + if (!zq->reply.msg) goto out_free; - zq->reply.length = max_response_size; + zq->reply.len = max_response_size; INIT_LIST_HEAD(&zq->list); kref_init(&zq->refcount); return zq; @@ -123,7 +123,7 @@ EXPORT_SYMBOL(zcrypt_queue_alloc); void zcrypt_queue_free(struct zcrypt_queue *zq) { - kfree(zq->reply.message); + kfree(zq->reply.msg); kfree(zq); } EXPORT_SYMBOL(zcrypt_queue_free); -- cgit v1.2.3 From 776499058167d9f41c8eb468e21fe2d241c0b8e6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 1 Jul 2020 16:18:29 +0200 Subject: mm/memblock: expose only miminal interface to add/walk physmem "physmem" in the memblock allocator is somewhat weird: it's not actually used for allocation, it's simply information collected during boot, which describes the unmodified physical memory map at boot time, without any standby/hotplugged memory. It's only used on s390 and is currently the only reason s390 keeps using CONFIG_ARCH_KEEP_MEMBLOCK. Physmem isn't numa aware and current users don't specify any flags. Let's hide it from the user, exposing only for_each_physmem(), and simplify. The interface for physmem is now really minimalistic: - memblock_physmem_add() to add ranges - for_each_physmem() / __next_physmem_range() to walk physmem ranges Don't place it into an __init section and don't discard it without CONFIG_ARCH_KEEP_MEMBLOCK. As we're reusing __next_mem_range(), remove the __meminit notifier to avoid section mismatch warnings once CONFIG_ARCH_KEEP_MEMBLOCK is no longer used with CONFIG_HAVE_MEMBLOCK_PHYS_MAP. While fixing up the documentation, sneak in some related cleanups. We can stop setting CONFIG_ARCH_KEEP_MEMBLOCK for s390 next. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: David Hildenbrand Reviewed-by: Mike Rapoport Message-Id: <20200701141830.18749-2-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/kernel/crash_dump.c | 6 ++--- include/linux/memblock.h | 28 ++++++++++++++++++--- mm/memblock.c | 57 ++++++++++++++++++++++--------------------- 3 files changed, 55 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f96a5857bbfd..c42ce348103c 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -549,8 +549,7 @@ static int get_mem_chunk_cnt(void) int cnt = 0; u64 idx; - for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, - MEMBLOCK_NONE, NULL, NULL, NULL) + for_each_physmem_range(idx, &oldmem_type, NULL, NULL) cnt++; return cnt; } @@ -563,8 +562,7 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) phys_addr_t start, end; u64 idx; - for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, - MEMBLOCK_NONE, &start, &end, NULL) { + for_each_physmem_range(idx, &oldmem_type, &start, &end) { phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; phdr->p_offset = start; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 017fae833d4a..9d925db0d355 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -77,16 +77,12 @@ struct memblock_type { * @current_limit: physical address of the current allocation limit * @memory: usable memory regions * @reserved: reserved memory regions - * @physmem: all physical memory */ struct memblock { bool bottom_up; /* is bottom up direction? */ phys_addr_t current_limit; struct memblock_type memory; struct memblock_type reserved; -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - struct memblock_type physmem; -#endif }; extern struct memblock memblock; @@ -145,6 +141,30 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, void __memblock_free_late(phys_addr_t base, phys_addr_t size); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, + phys_addr_t *out_start, + phys_addr_t *out_end) +{ + extern struct memblock_type physmem; + + __next_mem_range(idx, NUMA_NO_NODE, MEMBLOCK_NONE, &physmem, type, + out_start, out_end, NULL); +} + +/** + * for_each_physmem_range - iterate through physmem areas not included in type. + * @i: u64 used as loop variable + * @type: ptr to memblock_type which excludes from the iteration, can be %NULL + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + */ +#define for_each_physmem_range(i, type, p_start, p_end) \ + for (i = 0, __next_physmem_range(&i, type, p_start, p_end); \ + i != (u64)ULLONG_MAX; \ + __next_physmem_range(&i, type, p_start, p_end)) +#endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */ + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. diff --git a/mm/memblock.c b/mm/memblock.c index 39aceafc57f6..45f198750be9 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -44,19 +44,20 @@ * in the system, for instance when the memory is restricted with * ``mem=`` command line parameter * * ``reserved`` - describes the regions that were allocated - * * ``physmap`` - describes the actual physical memory regardless of - * the possible restrictions; the ``physmap`` type is only available - * on some architectures. + * * ``physmem`` - describes the actual physical memory available during + * boot regardless of the possible restrictions and memory hot(un)plug; + * the ``physmem`` type is only available on some architectures. * * Each region is represented by :c:type:`struct memblock_region` that * defines the region extents, its attributes and NUMA node id on NUMA * systems. Every memory type is described by the :c:type:`struct * memblock_type` which contains an array of memory regions along with - * the allocator metadata. The memory types are nicely wrapped with - * :c:type:`struct memblock`. This structure is statically initialzed - * at build time. The region arrays for the "memory" and "reserved" - * types are initially sized to %INIT_MEMBLOCK_REGIONS and for the - * "physmap" type to %INIT_PHYSMEM_REGIONS. + * the allocator metadata. The "memory" and "reserved" types are nicely + * wrapped with :c:type:`struct memblock`. This structure is statically + * initialized at build time. The region arrays are initially sized to + * %INIT_MEMBLOCK_REGIONS for "memory" and %INIT_MEMBLOCK_RESERVED_REGIONS + * for "reserved". The region array for "physmem" is initially sized to + * %INIT_PHYSMEM_REGIONS. * The memblock_allow_resize() enables automatic resizing of the region * arrays during addition of new regions. This feature should be used * with care so that memory allocated for the region array will not @@ -87,8 +88,8 @@ * function frees all the memory to the buddy page allocator. * * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the - * memblock data structures will be discarded after the system - * initialization completes. + * memblock data structures (except "physmem") will be discarded after the + * system initialization completes. */ #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -104,7 +105,7 @@ unsigned long long max_possible_pfn; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP -static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; +static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS]; #endif struct memblock memblock __initdata_memblock = { @@ -118,17 +119,19 @@ struct memblock memblock __initdata_memblock = { .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", -#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - .physmem.regions = memblock_physmem_init_regions, - .physmem.cnt = 1, /* empty dummy entry */ - .physmem.max = INIT_PHYSMEM_REGIONS, - .physmem.name = "physmem", -#endif - .bottom_up = false, .current_limit = MEMBLOCK_ALLOC_ANYWHERE, }; +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +struct memblock_type physmem = { + .regions = memblock_physmem_init_regions, + .cnt = 1, /* empty dummy entry */ + .max = INIT_PHYSMEM_REGIONS, + .name = "physmem", +}; +#endif + int memblock_debug __initdata_memblock; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; @@ -838,7 +841,7 @@ int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size) memblock_dbg("%s: [%pa-%pa] %pS\n", __func__, &base, &end, (void *)_RET_IP_); - return memblock_add_range(&memblock.physmem, base, size, MAX_NUMNODES, 0); + return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0); } #endif @@ -1019,12 +1022,10 @@ static bool should_skip_region(struct memblock_region *m, int nid, int flags) * As both region arrays are sorted, the function advances the two indices * in lockstep and returns each intersection. */ -void __init_memblock __next_mem_range(u64 *idx, int nid, - enum memblock_flags flags, - struct memblock_type *type_a, - struct memblock_type *type_b, - phys_addr_t *out_start, - phys_addr_t *out_end, int *out_nid) +void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, + struct memblock_type *type_a, + struct memblock_type *type_b, phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) { int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; @@ -1924,7 +1925,7 @@ void __init_memblock __memblock_dump_all(void) memblock_dump(&memblock.memory); memblock_dump(&memblock.reserved); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - memblock_dump(&memblock.physmem); + memblock_dump(&physmem); #endif } @@ -2064,8 +2065,8 @@ static int __init memblock_init_debugfs(void) debugfs_create_file("reserved", 0444, root, &memblock.reserved, &memblock_debug_fops); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP - debugfs_create_file("physmem", 0444, root, - &memblock.physmem, &memblock_debug_fops); + debugfs_create_file("physmem", 0444, root, &physmem, + &memblock_debug_fops); #endif return 0; -- cgit v1.2.3 From fa49066fc326b78e7141d68387179f8968e0e1f0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 1 Jul 2020 16:18:30 +0200 Subject: s390/mm: don't set ARCH_KEEP_MEMBLOCK Commit 50be63450728 ("s390/mm: Convert bootmem to memblock") mentions "The original bootmem allocator is getting replaced by memblock. To cover the needs of the s390 kdump implementation the physical memory list is used." As we can now reference "physmem" managed in the memblock allocator after init even without ARCH_KEEP_MEMBLOCK, and s390x does no longer need other memblock metadata after boot (esp., the zcore memmap device that used it got removed), we can stop setting ARCH_KEEP_MEMBLOCK. With this change, we no longer create memblocks for standby/hotplugged memory (added via add_memory()) and free up memblock metadata (except physmem) after boot. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Philipp Rudo Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: David Hildenbrand Message-Id: <20200701141830.18749-3-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c7d7ede6300c..7697a1f8e819 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -102,7 +102,6 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE - select ARCH_KEEP_MEMBLOCK select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING -- cgit v1.2.3 From 7b7735c5be473473d7a4b9e31460ed8e129dcb36 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 7 Jul 2020 14:07:53 +0200 Subject: s390: fix comment regarding interrupts in svc With the removal of the critical section cleanup, we now enter the svc interrupt handler with interrupts disabled. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Signed-off-by: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 969b35b177dd..23edf196d3dc 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -370,7 +370,7 @@ EXPORT_SYMBOL(sie_exit) /* * SVC interrupt handler routine. System calls are synchronous events and - * are executed with interrupts enabled. + * are entered with interrupts disabled. */ ENTRY(system_call) -- cgit v1.2.3 From 6589c93f99894e007a1260f009018effc958ab69 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jul 2020 11:21:25 +0200 Subject: s390: add trace events for idle enter/exit Helpful for debugging. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/idle.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 0d7fbdfe995a..88bb42ca5008 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "entry.h" @@ -32,11 +33,12 @@ void enabled_wait(void) PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); + trace_cpu_idle_rcuidle(1, smp_processor_id()); local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); local_irq_restore(flags); - + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); /* Account time spent with enabled wait psw loaded as idle time. */ write_seqcount_begin(&idle->seqcount); -- cgit v1.2.3 From 7904aaa8b22fa07fd5457ee4a885cf9f665cb9c4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Jul 2020 07:43:26 +0200 Subject: s390/mm: fix typo in comment Signed-off-by: Heiko Carstens --- arch/s390/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d53c2e2ea1fd..598828517d9d 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -376,7 +376,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, * routines. * * interruption code (int_code): - * 04 Protection -> Write-Protection (suprression) + * 04 Protection -> Write-Protection (suppression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) -- cgit v1.2.3 From 3c5f2eb9695cd241c9898a01388b19a149d0b7d2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Jul 2020 07:46:40 +0200 Subject: s390/mm: avoid trimming to MAX_ORDER Trimming to MAX_ORDER was originally done in order to avoid to set HOLES_IN_ZONE, which in turn would enable a quite expensive pfn_valid() check. pfn_valid() however only checks if a struct page exists for a given pfn. With sparsemen vmemmap there are always struct pages, since memmaps are allocated for whole sections. Therefore remove the HOLES_IN_ZONE comment and the trimming. Signed-off-by: Heiko Carstens --- arch/s390/kernel/setup.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5853c9872dfe..295a02bab64d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1126,14 +1126,6 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); remove_oldmem(); - /* - * Make sure all chunks are MAX_ORDER aligned so we don't need the - * extra checks that HOLES_IN_ZONE would require. - * - * Is this still required? - */ - memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); - if (is_prot_virt_host()) setup_uv(); setup_memory_end(); -- cgit v1.2.3 From 771cf196cc92a6078656548bbc073aa932c053ab Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 14 Jul 2020 08:22:21 +0200 Subject: s390/mm: allow order 10 allocations Get rid of FORCE_MAX_ZONEORDER which limited allocations to order 8 (= 1MB) and use the default, which allows for order 10 (= 4MB) allocations. Given that s390 allows less than the default this caused some memory allocation problems more or less unique to s390 from time to time. Note: this was originally introduced with commit 684de39bd795 ("[S390] Fix IPL from NSS.") in order to support Named Saved Segments, which could start/end at an arbitrary 1 megabyte boundary and also before support for sparsemem vmemmmap was enabled. Since NSS support is gone, but sparsemem vmemmap support is available this limitation can go away. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 7697a1f8e819..0df33cffec52 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -625,10 +625,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y -config FORCE_MAX_ZONEORDER - int - default "9" - config MAX_PHYSMEM_BITS int "Maximum size of supported physical memory in bits (42-53)" range 42 53 -- cgit v1.2.3 From 88aa8939c96781089e5ace3492d818074c5c6fe9 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 29 Jun 2020 20:48:09 +0200 Subject: s390/kernel: unify EX_TABLE* implementations Replace three implementations with one using using __stringify_in_c macro conveniently "borrowed" from powerpc and microblaze. Signed-off-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens --- arch/s390/include/asm/asm-const.h | 12 ++++++++++++ arch/s390/include/asm/linkage.h | 34 +++++++++++----------------------- 2 files changed, 23 insertions(+), 23 deletions(-) create mode 100644 arch/s390/include/asm/asm-const.h (limited to 'arch') diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h new file mode 100644 index 000000000000..11f615eb0066 --- /dev/null +++ b/arch/s390/include/asm/asm-const.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_ASM_CONST_H +#define _ASM_S390_ASM_CONST_H + +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +#else +/* This version of stringify will deal with commas... */ +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +#endif +#endif /* _ASM_S390_ASM_CONST_H */ diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 7f22262b0e46..1b52c07b5642 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -2,38 +2,26 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H +#include #include #define __ALIGN .align 4, 0x07 #define __ALIGN_STR __stringify(__ALIGN) -#ifndef __ASSEMBLY__ - /* * Helper macro for exception table entries */ -#define EX_TABLE(_fault, _target) \ - ".section __ex_table,\"a\"\n" \ - ".align 4\n" \ - ".long (" #_fault ") - .\n" \ - ".long (" #_target ") - .\n" \ - ".previous\n" - -#else /* __ASSEMBLY__ */ -#define EX_TABLE(_fault, _target) \ - .section __ex_table,"a" ; \ - .align 4 ; \ - .long (_fault) - . ; \ - .long (_target) - . ; \ - .previous +#define __EX_TABLE(_section, _fault, _target) \ + stringify_in_c(.section _section,"a";) \ + stringify_in_c(.align 4;) \ + stringify_in_c(.long (_fault) - .;) \ + stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.previous) -#define EX_TABLE_DMA(_fault, _target) \ - .section .dma.ex_table, "a" ; \ - .align 4 ; \ - .long (_fault) - . ; \ - .long (_target) - . ; \ - .previous +#define EX_TABLE(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target) +#define EX_TABLE_DMA(_fault, _target) \ + __EX_TABLE(.dma.ex_table, _fault, _target) -#endif /* __ASSEMBLY__ */ #endif -- cgit v1.2.3 From 05a68e892e89c97df6650cd8cc55058002657cbc Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 30 Jun 2020 20:52:03 +0200 Subject: s390/kernel: expand exception table logic to allow new handling options This is a s390 port of commit 548acf19234d ("x86/mm: Expand the exception table logic to allow new handling options"), which is needed for implementing BPF_PROBE_MEM on s390. The new handler field is made 64-bit in order to allow pointing from dynamically allocated entries to handlers in kernel text. Unlike on x86, NULL is used instead of ex_handler_default. This is because exception tables are used by boot/text_dma.S, and it would be a pain to preserve ex_handler_default. The new infrastructure is ignored in early_pgm_check_handler, since there is no pt_regs. Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/extable.h | 52 +++++++++++++++++++++++++++++++++++++---- arch/s390/include/asm/linkage.h | 3 ++- arch/s390/kernel/kprobes.c | 4 +--- arch/s390/kernel/traps.c | 7 ++---- arch/s390/mm/fault.c | 4 +--- scripts/sorttable.c | 41 ++++++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index ae27f756b409..3beb294fd553 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -1,12 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __S390_EXTABLE_H #define __S390_EXTABLE_H + +#include +#include + /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * The exception table consists of three addresses: + * + * - Address of an instruction that is allowed to fault. + * - Address at which the program should continue. + * - Optional address of handler that takes pt_regs * argument and runs in + * interrupt context. + * + * No registers are modified, so it is entirely up to the continuation code + * to figure out what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -17,6 +25,7 @@ struct exception_table_entry { int insn, fixup; + long handler; }; extern struct exception_table_entry *__start_dma_ex_table; @@ -29,6 +38,39 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) return (unsigned long)&x->fixup + x->fixup; } +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *); + +static inline ex_handler_t +ex_fixup_handler(const struct exception_table_entry *x) +{ + if (likely(!x->handler)) + return NULL; + return (ex_handler_t)((unsigned long)&x->handler + x->handler); +} + +static inline bool ex_handle(const struct exception_table_entry *x, + struct pt_regs *regs) +{ + ex_handler_t handler = ex_fixup_handler(x); + + if (unlikely(handler)) + return handler(x, regs); + regs->psw.addr = extable_fixup(x); + return true; +} + #define ARCH_HAS_RELATIVE_EXTABLE +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->handler = b->handler + delta; + b->handler = tmp.handler - delta; +} + #endif diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h index 1b52c07b5642..a0a7a2c72bd4 100644 --- a/arch/s390/include/asm/linkage.h +++ b/arch/s390/include/asm/linkage.h @@ -14,9 +14,10 @@ #define __EX_TABLE(_section, _fault, _target) \ stringify_in_c(.section _section,"a";) \ - stringify_in_c(.align 4;) \ + stringify_in_c(.align 8;) \ stringify_in_c(.long (_fault) - .;) \ stringify_in_c(.long (_target) - .;) \ + stringify_in_c(.quad 0;) \ stringify_in_c(.previous) #define EX_TABLE(_fault, _target) \ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 548d0ea9808d..d2a71d872638 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -523,10 +523,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ entry = s390_search_extables(regs->psw.addr); - if (entry) { - regs->psw.addr = extable_fixup(entry); + if (entry && ex_handle(entry, regs)) return 1; - } /* * fixup_exception() could not handle it, diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ff9cc4c3290e..8d1e8a1a97df 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -50,11 +50,8 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) } else { const struct exception_table_entry *fixup; fixup = s390_search_extables(regs->psw.addr); - if (fixup) - regs->psw.addr = extable_fixup(fixup); - else { + if (!fixup || !ex_handle(fixup, regs)) die(regs, str); - } } } @@ -251,7 +248,7 @@ void monitor_event_exception(struct pt_regs *regs) case BUG_TRAP_TYPE_NONE: fixup = s390_search_extables(regs->psw.addr); if (fixup) - regs->psw.addr = extable_fixup(fixup); + ex_handle(fixup, regs); break; case BUG_TRAP_TYPE_WARN: break; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 598828517d9d..aebf9183bedd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -255,10 +255,8 @@ static noinline void do_no_context(struct pt_regs *regs) /* Are we prepared to handle this kernel fault? */ fixup = s390_search_extables(regs->psw.addr); - if (fixup) { - regs->psw.addr = extable_fixup(fixup); + if (fixup && ex_handle(fixup, regs)) return; - } /* * Oops. The kernel tried to access some bad page. We'll have to diff --git a/scripts/sorttable.c b/scripts/sorttable.c index ec6b5e81eba1..0ef3abfc4a51 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -255,6 +255,45 @@ static void x86_sort_relative_table(char *extab_image, int image_size) } } +static void s390_sort_relative_table(char *extab_image, int image_size) +{ + int i; + + for (i = 0; i < image_size; i += 16) { + char *loc = extab_image + i; + uint64_t handler; + + w(r((uint32_t *)loc) + i, (uint32_t *)loc); + w(r((uint32_t *)(loc + 4)) + (i + 4), (uint32_t *)(loc + 4)); + /* + * 0 is a special self-relative handler value, which means that + * handler should be ignored. It is safe, because it means that + * handler field points to itself, which should never happen. + * When creating extable-relative values, keep it as 0, since + * this should never occur either: it would mean that handler + * field points to the first extable entry. + */ + handler = r8((uint64_t *)(loc + 8)); + if (handler) + handler += i + 8; + w8(handler, (uint64_t *)(loc + 8)); + } + + qsort(extab_image, image_size / 16, 16, compare_relative_table); + + for (i = 0; i < image_size; i += 16) { + char *loc = extab_image + i; + uint64_t handler; + + w(r((uint32_t *)loc) - i, (uint32_t *)loc); + w(r((uint32_t *)(loc + 4)) - (i + 4), (uint32_t *)(loc + 4)); + handler = r8((uint64_t *)(loc + 8)); + if (handler) + handler -= i + 8; + w8(handler, (uint64_t *)(loc + 8)); + } +} + static int do_file(char const *const fname, void *addr) { int rc = -1; @@ -297,6 +336,8 @@ static int do_file(char const *const fname, void *addr) custom_sort = x86_sort_relative_table; break; case EM_S390: + custom_sort = s390_sort_relative_table; + break; case EM_AARCH64: case EM_PARISC: case EM_PPC: -- cgit v1.2.3 From 3f161e0ae863a0456d00e5a6c9c81098c62ab7fe Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 24 Jun 2020 14:55:22 +0200 Subject: s390/bpf: implement BPF_PROBE_MEM This is a s390 port of x86 commit 3dec541b2e63 ("bpf: Add support for BTF pointers to x86 JIT"). Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/net/bpf_jit_comp.c | 139 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f4242b894cf2..8fe7bdfc8d15 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -49,6 +49,7 @@ struct bpf_jit { int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ + int excnt; /* Number of exception table entries */ int labels[1]; /* Labels for local jumps */ }; @@ -588,6 +589,84 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) } } +static int get_probe_mem_regno(const u8 *insn) +{ + /* + * insn must point to llgc, llgh, llgf or lg, which have destination + * register at the same position. + */ + if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */ + return -1; + if (insn[5] != 0x90 && /* llgc */ + insn[5] != 0x91 && /* llgh */ + insn[5] != 0x16 && /* llgf */ + insn[5] != 0x04) /* lg */ + return -1; + return insn[1] >> 4; +} + +static bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs) +{ + int regno; + u8 *insn; + + regs->psw.addr = extable_fixup(x); + insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16); + regno = get_probe_mem_regno(insn); + if (WARN_ON_ONCE(regno < 0)) + /* JIT bug - unexpected instruction. */ + return false; + regs->gprs[regno] = 0; + return true; +} + +static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp, + int probe_prg, int nop_prg) +{ + struct exception_table_entry *ex; + s64 delta; + u8 *insn; + int prg; + int i; + + if (!fp->aux->extable) + /* Do nothing during early JIT passes. */ + return 0; + insn = jit->prg_buf + probe_prg; + if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0)) + /* JIT bug - unexpected probe instruction. */ + return -1; + if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg)) + /* JIT bug - gap between probe and nop instructions. */ + return -1; + for (i = 0; i < 2; i++) { + if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries)) + /* Verifier bug - not enough entries. */ + return -1; + ex = &fp->aux->extable[jit->excnt]; + /* Add extable entries for probe and nop instructions. */ + prg = i == 0 ? probe_prg : nop_prg; + delta = jit->prg_buf + prg - (u8 *)&ex->insn; + if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX)) + /* JIT bug - code and extable must be close. */ + return -1; + ex->insn = delta; + /* + * Always land on the nop. Note that extable infrastructure + * ignores fixup field, it is handled by ex_handler_bpf(). + */ + delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup; + if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX)) + /* JIT bug - landing pad and extable must be close. */ + return -1; + ex->fixup = delta; + ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + jit->excnt++; + } + return 0; +} + /* * Compile one eBPF instruction into s390x code * @@ -604,7 +683,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, u32 *addrs = jit->addrs; s32 imm = insn->imm; s16 off = insn->off; + int probe_prg = -1; unsigned int mask; + int nop_prg; + int err; + + if (BPF_CLASS(insn->code) == BPF_LDX && + BPF_MODE(insn->code) == BPF_PROBE_MEM) + probe_prg = jit->prg; switch (insn->code) { /* @@ -1119,6 +1205,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_B: /* llgc %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; @@ -1126,6 +1213,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* llgh %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; @@ -1133,6 +1221,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* llgf %dst,off(%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off); @@ -1140,6 +1229,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* lg %dst,0(off,%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off); @@ -1485,6 +1575,23 @@ branch_oc: pr_err("Unknown opcode %02x\n", insn->code); return -1; } + + if (probe_prg != -1) { + /* + * Handlers of certain exceptions leave psw.addr pointing to + * the instruction directly after the failing one. Therefore, + * create two exception table entries and also add a nop in + * case two probing instructions come directly after each + * other. + */ + nop_prg = jit->prg; + /* bcr 0,%0 */ + _EMIT2(0x0700); + err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg); + if (err < 0) + return err; + } + return insn_count; } @@ -1527,6 +1634,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit32 = jit->lit32_start; jit->lit64 = jit->lit64_start; jit->prg = 0; + jit->excnt = 0; bpf_jit_prologue(jit, stack_depth); if (bpf_set_addr(jit, 0) < 0) @@ -1551,6 +1659,12 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit64_start = ALIGN(jit->lit64_start, 8); jit->size = jit->lit64_start + lit64_size; jit->size_prg = jit->prg; + + if (WARN_ON_ONCE(fp->aux->extable && + jit->excnt != fp->aux->num_exentries)) + /* Verifier bug - too many entries. */ + return -1; + return 0; } @@ -1565,6 +1679,29 @@ struct s390_jit_data { int pass; }; +static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit, + struct bpf_prog *fp) +{ + struct bpf_binary_header *header; + u32 extable_size; + u32 code_size; + + /* We need two entries per insn. */ + fp->aux->num_exentries *= 2; + + code_size = roundup(jit->size, + __alignof__(struct exception_table_entry)); + extable_size = fp->aux->num_exentries * + sizeof(struct exception_table_entry); + header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf, + 8, jit_fill_hole); + if (!header) + return NULL; + fp->aux->extable = (struct exception_table_entry *) + (jit->prg_buf + code_size); + return header; +} + /* * Compile eBPF program "fp" */ @@ -1631,7 +1768,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* * Final pass: Allocate and generate program */ - header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole); + header = bpf_jit_alloc(&jit, fp); if (!header) { fp = orig_fp; goto free_addrs; -- cgit v1.2.3 From 58e15716feb562cdba57e99d62c525a1faa37c08 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jul 2020 14:15:02 +0200 Subject: s390/time: use CLOCKSOURCE_MASK Make use of CLOCKSOURCE_MASK instead of open-coding it. Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 700127ba689d..317059684847 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -253,7 +253,7 @@ static struct clocksource clocksource_tod = { .name = "tod", .rating = 400, .read = read_tod_clock, - .mask = -1ULL, + .mask = CLOCKSOURCE_MASK(64), .mult = 1000, .shift = 12, .flags = CLOCK_SOURCE_IS_CONTINUOUS, -- cgit v1.2.3 From 555701a714f77e01490f633c1080cf97f0ede1f0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jul 2020 14:16:03 +0200 Subject: s390/time: select CLOCKSOURCE_VALIDATE_LAST_CYCLE The value returned by read_tod_clock() will overflow on September 17th 2042. To avoid that system time jumps back select CLOCKSOURCE_VALIDATE_LAST_CYCLE which enables a sanity check in order to prevent negative "delta" values. Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0df33cffec52..d95d323cf213 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -125,6 +125,7 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC + select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY -- cgit v1.2.3 From 411155820bb348e71ecc5b1db147b36af98cbc96 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jul 2020 14:28:36 +0200 Subject: s390/time: improve comparison for tod steering It doesn't make sense to add zero shifted by 15. It's still zero. Signed-off-by: Heiko Carstens --- arch/s390/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 317059684847..513e59d08a55 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -237,7 +237,7 @@ static u64 read_tod_clock(struct clocksource *cs) preempt_disable(); /* protect from changes to steering parameters */ now = get_tod_clock(); adj = tod_steering_end - now; - if (unlikely((s64) adj >= 0)) + if (unlikely((s64) adj > 0)) /* * manually steer by 1 cycle every 2^16 cycles. This * corresponds to shifting the tod delta by 15. 1s is -- cgit v1.2.3 From 4631f3ca493a7c8f9f31aef45fc0fc0e182155b7 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 7 Jul 2020 16:42:19 +0200 Subject: s390/pci: clarify comment in s390_mmio_read/write The existing comment was talking about reading in the write part and vice versa. While we are here make it more clear why restricting the syscalls to MIO capable devices is okay. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_mmio.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 38efa3e852c4..401cf670a243 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -155,10 +155,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, return -EINVAL; /* - * Only support read access to MIO capable devices on a MIO enabled - * system. Otherwise we would have to check for every address if it is - * a special ZPCI_ADDR and we would have to do a get_pfn() which we - * don't need for MIO capable devices. + * We only support write access to MIO capable devices if we are on + * a MIO enabled system. Otherwise we would have to check for every + * address if it is a special ZPCI_ADDR and would have to do + * a get_pfn() which we don't need for MIO capable devices. Currently + * ISM devices are the only devices without MIO support and there is no + * known need for accessing these from userspace. */ if (static_branch_likely(&have_mio)) { ret = __memcpy_toio_inuser((void __iomem *) mmio_addr, @@ -282,10 +284,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, return -EINVAL; /* - * Only support write access to MIO capable devices on a MIO enabled - * system. Otherwise we would have to check for every address if it is - * a special ZPCI_ADDR and we would have to do a get_pfn() which we - * don't need for MIO capable devices. + * We only support read access to MIO capable devices if we are on + * a MIO enabled system. Otherwise we would have to check for every + * address if it is a special ZPCI_ADDR and would have to do + * a get_pfn() which we don't need for MIO capable devices. Currently + * ISM devices are the only devices without MIO support and there is no + * known need for accessing these from userspace. */ if (static_branch_likely(&have_mio)) { ret = __memcpy_fromio_inuser( -- cgit v1.2.3 From 73d6eb48d26930f0cbdc8bf1ccb0ad964e7d2b90 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 22 Jul 2020 23:58:54 +0200 Subject: s390: enable HAVE_FUNCTION_ERROR_INJECTION This kernel feature is required for enabling BPF_KPROBE_OVERRIDE. Define override_function_with_return() and regs_set_return_value() functions, and fix compile errors in syscall_wrapper.h. Signed-off-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + arch/s390/include/asm/ptrace.h | 5 +++++ arch/s390/include/asm/syscall_wrapper.h | 6 +++--- arch/s390/lib/Makefile | 2 ++ arch/s390/lib/error-inject.c | 14 ++++++++++++++ 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 arch/s390/lib/error-inject.c (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d95d323cf213..9cfd8de907cb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -145,6 +145,7 @@ config S390 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index f009a13afe71..16b3e4396312 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -184,5 +184,10 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) return regs->gprs[15]; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gprs[2] = rc; +} + #endif /* __ASSEMBLY__ */ #endif /* _S390_PTRACE_H */ diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index 3c3d6fe8e2f0..1320f4213d80 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -30,7 +30,7 @@ }) #define __S390_SYS_STUBx(x, name, ...) \ - asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ + asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ { \ @@ -46,7 +46,7 @@ #define COMPAT_SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long __s390_compat_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \ asmlinkage long __s390_compat_sys_##sname(void) #define SYSCALL_DEFINE0(sname) \ @@ -72,7 +72,7 @@ asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ - ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ + ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 28fd66d558ff..678333936f78 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -14,3 +14,5 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls + +lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c new file mode 100644 index 000000000000..8c9d4da87eef --- /dev/null +++ b/arch/s390/lib/error-inject.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include + +void override_function_with_return(struct pt_regs *regs) +{ + /* + * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some + * kernel function. + */ + regs->psw.addr = regs->gprs[14]; +} +NOKPROBE_SYMBOL(override_function_with_return); -- cgit v1.2.3 From 8398b226b8f01df902450658a139ee01d9f4c482 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:50 +0200 Subject: s390/vmem: rename vmem_add_mem() to vmem_add_range() Let's match the name to vmem_remove_range(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-2-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3b9e71654c37..66c5333020ea 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -57,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void) /* * Add a physical memory range to the 1:1 mapping. */ -static int vmem_add_mem(unsigned long start, unsigned long size) +static int vmem_add_range(unsigned long start, unsigned long size) { unsigned long pgt_prot, sgt_prot, r3_prot; unsigned long pages4k, pages1m, pages2g; @@ -308,7 +308,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size) return -ERANGE; mutex_lock(&vmem_mutex); - ret = vmem_add_mem(start, size); + ret = vmem_add_range(start, size); if (ret) vmem_remove_range(start, size); mutex_unlock(&vmem_mutex); @@ -325,7 +325,7 @@ void __init vmem_map_init(void) struct memblock_region *reg; for_each_memblock(memory, reg) - vmem_add_mem(reg->base, reg->size); + vmem_add_range(reg->base, reg->size); __set_memory((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); -- cgit v1.2.3 From 3e0d3e408e63839625b210e5eb7269c45b870a38 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:51 +0200 Subject: s390/vmem: consolidate vmem_add_range() and vmem_remove_range() We want to have only a single pagetable walker and reuse the same functionality for vmemmap handling. Let's start by consolidating vmem_add_range() and vmem_remove_range(), converting it into a recursive implementation. A recursive implementation makes it easier to expand individual cases without harming readability. In addition, we minimize traversing the whole hierarchy over and over again. One change is that we don't unmap large PMDs/PUDs when not completely covered by the request, something that should never happen with direct mappings, unless one would be removing in other granularity than added, which would be broken already. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-3-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 317 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 198 insertions(+), 119 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 66c5333020ea..177daf389d39 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -54,148 +54,227 @@ pte_t __ref *vmem_pte_alloc(void) return pte; } -/* - * Add a physical memory range to the 1:1 mapping. - */ -static int vmem_add_range(unsigned long start, unsigned long size) +static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, + bool add) { - unsigned long pgt_prot, sgt_prot, r3_prot; - unsigned long pages4k, pages1m, pages2g; - unsigned long end = start + size; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - int ret = -ENOMEM; + unsigned long prot, pages = 0; + pte_t *pte; - pgt_prot = pgprot_val(PAGE_KERNEL); - sgt_prot = pgprot_val(SEGMENT_KERNEL); - r3_prot = pgprot_val(REGION3_KERNEL); - if (!MACHINE_HAS_NX) { - pgt_prot &= ~_PAGE_NOEXEC; - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; - r3_prot &= ~_REGION_ENTRY_NOEXEC; + prot = pgprot_val(PAGE_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_PAGE_NOEXEC; + + pte = pte_offset_kernel(pmd, addr); + for (; addr < end; addr += PAGE_SIZE, pte++) { + if (!add) { + if (pte_none(*pte)) + continue; + pte_clear(&init_mm, addr, pte); + } else if (pte_none(*pte)) { + pte_val(*pte) = addr | prot; + } else + continue; + + pages++; } - pages4k = pages1m = pages2g = 0; - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4_dir) - goto out; - pgd_populate(&init_mm, pg_dir, p4_dir); - } - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pu_dir) + + update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); +} + +static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, + bool add) +{ + unsigned long next, prot, pages = 0; + int ret = -ENOMEM; + pmd_t *pmd; + pte_t *pte; + + prot = pgprot_val(SEGMENT_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_SEGMENT_ENTRY_NOEXEC; + + pmd = pmd_offset(pud, addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!add) { + if (pmd_none(*pmd)) + continue; + if (pmd_large(*pmd) && !add) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE)) { + pmd_clear(pmd); + pages++; + } + continue; + } + } else if (pmd_none(*pmd)) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE) && + MACHINE_HAS_EDAT1 && addr && + !debug_pagealloc_enabled()) { + pmd_val(*pmd) = addr | prot; + pages++; + continue; + } + pte = vmem_pte_alloc(); + if (!pte) goto out; - p4d_populate(&init_mm, p4_dir, pu_dir); - } - pu_dir = pud_offset(p4_dir, address); - if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && - !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && - !debug_pagealloc_enabled()) { - pud_val(*pu_dir) = address | r3_prot; - address += PUD_SIZE; - pages2g++; + pmd_populate(&init_mm, pmd, pte); + } else if (pmd_large(*pmd)) continue; - } - if (pud_none(*pu_dir)) { - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pm_dir) + + modify_pte_table(pmd, addr, next, add); + } + ret = 0; +out: + update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); + return ret; +} + +static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, + bool add) +{ + unsigned long next, prot, pages = 0; + int ret = -ENOMEM; + pud_t *pud; + pmd_t *pmd; + + prot = pgprot_val(REGION3_KERNEL); + if (!MACHINE_HAS_NX) + prot &= ~_REGION_ENTRY_NOEXEC; + + pud = pud_offset(p4d, addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + + if (!add) { + if (pud_none(*pud)) + continue; + if (pud_large(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE)) { + pud_clear(pud); + pages++; + } + continue; + } + } else if (pud_none(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE) && + MACHINE_HAS_EDAT2 && addr && + !debug_pagealloc_enabled()) { + pud_val(*pud) = addr | prot; + pages++; + continue; + } + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!pmd) goto out; - pud_populate(&init_mm, pu_dir, pm_dir); - } - pm_dir = pmd_offset(pu_dir, address); - if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && - !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && - !debug_pagealloc_enabled()) { - pmd_val(*pm_dir) = address | sgt_prot; - address += PMD_SIZE; - pages1m++; + pud_populate(&init_mm, pud, pmd); + } else if (pud_large(*pud)) continue; + + ret = modify_pmd_table(pud, addr, next, add); + if (ret) + goto out; + } + ret = 0; +out: + update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); + return ret; +} + +static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, + bool add) +{ + unsigned long next; + int ret = -ENOMEM; + p4d_t *p4d; + pud_t *pud; + + p4d = p4d_offset(pgd, addr); + for (; addr < end; addr = next, p4d++) { + next = p4d_addr_end(addr, end); + + if (!add) { + if (p4d_none(*p4d)) + continue; + } else if (p4d_none(*p4d)) { + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!pud) + goto out; } - if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(); - if (!pt_dir) + + ret = modify_pud_table(p4d, addr, next, add); + if (ret) + goto out; + } + ret = 0; +out: + return ret; +} + +static int modify_pagetable(unsigned long start, unsigned long end, bool add) +{ + unsigned long addr, next; + int ret = -ENOMEM; + pgd_t *pgd; + p4d_t *p4d; + + if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) + return -EINVAL; + + for (addr = start; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgd = pgd_offset_k(addr); + + if (!add) { + if (pgd_none(*pgd)) + continue; + } else if (pgd_none(*pgd)) { + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!p4d) goto out; - pmd_populate(&init_mm, pm_dir, pt_dir); + pgd_populate(&init_mm, pgd, p4d); } - pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = address | pgt_prot; - address += PAGE_SIZE; - pages4k++; + ret = modify_p4d_table(pgd, addr, next, add); + if (ret) + goto out; } ret = 0; out: - update_page_count(PG_DIRECT_MAP_4K, pages4k); - update_page_count(PG_DIRECT_MAP_1M, pages1m); - update_page_count(PG_DIRECT_MAP_2G, pages2g); + if (!add) + flush_tlb_kernel_range(start, end); return ret; } +static int add_pagetable(unsigned long start, unsigned long end) +{ + return modify_pagetable(start, end, true); +} + +static int remove_pagetable(unsigned long start, unsigned long end) +{ + return modify_pagetable(start, end, false); +} + +/* + * Add a physical memory range to the 1:1 mapping. + */ +static int vmem_add_range(unsigned long start, unsigned long size) +{ + return add_pagetable(start, start + size); +} + /* * Remove a physical memory range from the 1:1 mapping. * Currently only invalidates page table entries. */ static void vmem_remove_range(unsigned long start, unsigned long size) { - unsigned long pages4k, pages1m, pages2g; - unsigned long end = start + size; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - - pages4k = pages1m = pages2g = 0; - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - address += PGDIR_SIZE; - continue; - } - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - address += P4D_SIZE; - continue; - } - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - address += PUD_SIZE; - continue; - } - if (pud_large(*pu_dir)) { - pud_clear(pu_dir); - address += PUD_SIZE; - pages2g++; - continue; - } - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - address += PMD_SIZE; - continue; - } - if (pmd_large(*pm_dir)) { - pmd_clear(pm_dir); - address += PMD_SIZE; - pages1m++; - continue; - } - pt_dir = pte_offset_kernel(pm_dir, address); - pte_clear(&init_mm, address, pt_dir); - address += PAGE_SIZE; - pages4k++; - } - flush_tlb_kernel_range(start, end); - update_page_count(PG_DIRECT_MAP_4K, -pages4k); - update_page_count(PG_DIRECT_MAP_1M, -pages1m); - update_page_count(PG_DIRECT_MAP_2G, -pages2g); + remove_pagetable(start, start + size); } /* -- cgit v1.2.3 From 9ec8fa8dc331be6b63726be696b2b21d0031a09b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:52 +0200 Subject: s390/vmemmap: extend modify_pagetable() to handle vmemmap Extend our shiny new modify_pagetable() to handle !direct (vmemmap) mappings. Convert vmemmap_populate() and implement vmemmap_free(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-4-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 181 ++++++++++++++++++++++------------------------------ 1 file changed, 76 insertions(+), 105 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 177daf389d39..43fe1e2eb90e 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -29,6 +29,15 @@ static void __ref *vmem_alloc_pages(unsigned int order) return (void *) memblock_phys_alloc(size, size); } +static void vmem_free_pages(unsigned long addr, int order) +{ + /* We don't expect boot memory to be removed ever. */ + if (!slab_is_available() || + WARN_ON_ONCE(PageReserved(phys_to_page(addr)))) + return; + free_pages(addr, order); +} + void *vmem_crst_alloc(unsigned long val) { unsigned long *table; @@ -54,10 +63,12 @@ pte_t __ref *vmem_pte_alloc(void) return pte; } -static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, - bool add) +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ +static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, + unsigned long end, bool add, bool direct) { unsigned long prot, pages = 0; + int ret = -ENOMEM; pte_t *pte; prot = pgprot_val(PAGE_KERNEL); @@ -69,20 +80,34 @@ static void modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, if (!add) { if (pte_none(*pte)) continue; + if (!direct) + vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0); pte_clear(&init_mm, addr, pte); } else if (pte_none(*pte)) { - pte_val(*pte) = addr | prot; + if (!direct) { + void *new_page = vmemmap_alloc_block(PAGE_SIZE, + NUMA_NO_NODE); + + if (!new_page) + goto out; + pte_val(*pte) = __pa(new_page) | prot; + } else + pte_val(*pte) = addr | prot; } else continue; pages++; } - - update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); + ret = 0; +out: + if (direct) + update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages); + return ret; } -static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, - bool add) +/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ +static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, + unsigned long end, bool add, bool direct) { unsigned long next, prot, pages = 0; int ret = -ENOMEM; @@ -103,6 +128,9 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, if (pmd_large(*pmd) && !add) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { + if (!direct) + vmem_free_pages(pmd_deref(*pmd), + get_order(PMD_SIZE)); pmd_clear(pmd); pages++; } @@ -111,11 +139,27 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, } else if (pmd_none(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE) && - MACHINE_HAS_EDAT1 && addr && + MACHINE_HAS_EDAT1 && addr && direct && !debug_pagealloc_enabled()) { pmd_val(*pmd) = addr | prot; pages++; continue; + } else if (!direct && MACHINE_HAS_EDAT1) { + void *new_page; + + /* + * Use 1MB frames for vmemmap if available. We + * always use large frames even if they are only + * partially used. Otherwise we would have also + * page tables since vmemmap_populate gets + * called for each section separately. + */ + new_page = vmemmap_alloc_block(PMD_SIZE, + NUMA_NO_NODE); + if (!new_page) + goto out; + pmd_val(*pmd) = __pa(new_page) | prot; + continue; } pte = vmem_pte_alloc(); if (!pte) @@ -124,16 +168,19 @@ static int modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, } else if (pmd_large(*pmd)) continue; - modify_pte_table(pmd, addr, next, add); + ret = modify_pte_table(pmd, addr, next, add, direct); + if (ret) + goto out; } ret = 0; out: - update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); + if (direct) + update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages); return ret; } static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, - bool add) + bool add, bool direct) { unsigned long next, prot, pages = 0; int ret = -ENOMEM; @@ -162,7 +209,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_none(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE) && - MACHINE_HAS_EDAT2 && addr && + MACHINE_HAS_EDAT2 && addr && direct && !debug_pagealloc_enabled()) { pud_val(*pud) = addr | prot; pages++; @@ -175,18 +222,19 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, } else if (pud_large(*pud)) continue; - ret = modify_pmd_table(pud, addr, next, add); + ret = modify_pmd_table(pud, addr, next, add, direct); if (ret) goto out; } ret = 0; out: - update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); + if (direct) + update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages); return ret; } static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, - bool add) + bool add, bool direct) { unsigned long next; int ret = -ENOMEM; @@ -206,7 +254,7 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, goto out; } - ret = modify_pud_table(p4d, addr, next, add); + ret = modify_pud_table(p4d, addr, next, add, direct); if (ret) goto out; } @@ -215,7 +263,8 @@ out: return ret; } -static int modify_pagetable(unsigned long start, unsigned long end, bool add) +static int modify_pagetable(unsigned long start, unsigned long end, bool add, + bool direct) { unsigned long addr, next; int ret = -ENOMEM; @@ -239,7 +288,7 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add) pgd_populate(&init_mm, pgd, p4d); } - ret = modify_p4d_table(pgd, addr, next, add); + ret = modify_p4d_table(pgd, addr, next, add, direct); if (ret) goto out; } @@ -250,14 +299,14 @@ out: return ret; } -static int add_pagetable(unsigned long start, unsigned long end) +static int add_pagetable(unsigned long start, unsigned long end, bool direct) { - return modify_pagetable(start, end, true); + return modify_pagetable(start, end, true, direct); } -static int remove_pagetable(unsigned long start, unsigned long end) +static int remove_pagetable(unsigned long start, unsigned long end, bool direct) { - return modify_pagetable(start, end, false); + return modify_pagetable(start, end, false, direct); } /* @@ -265,7 +314,7 @@ static int remove_pagetable(unsigned long start, unsigned long end) */ static int vmem_add_range(unsigned long start, unsigned long size) { - return add_pagetable(start, start + size); + return add_pagetable(start, start + size, true); } /* @@ -274,7 +323,7 @@ static int vmem_add_range(unsigned long start, unsigned long size) */ static void vmem_remove_range(unsigned long start, unsigned long size) { - remove_pagetable(start, start + size); + remove_pagetable(start, start + size, true); } /* @@ -283,92 +332,14 @@ static void vmem_remove_range(unsigned long start, unsigned long size) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - unsigned long pgt_prot, sgt_prot; - unsigned long address = start; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - int ret = -ENOMEM; - - pgt_prot = pgprot_val(PAGE_KERNEL); - sgt_prot = pgprot_val(SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) { - pgt_prot &= ~_PAGE_NOEXEC; - sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; - } - for (address = start; address < end;) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4_dir) - goto out; - pgd_populate(&init_mm, pg_dir, p4_dir); - } - - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pu_dir) - goto out; - p4d_populate(&init_mm, p4_dir, pu_dir); - } - - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pm_dir) - goto out; - pud_populate(&init_mm, pu_dir, pm_dir); - } - - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - /* Use 1MB frames for vmemmap if available. We always - * use large frames even if they are only partially - * used. - * Otherwise we would have also page tables since - * vmemmap_populate gets called for each section - * separately. */ - if (MACHINE_HAS_EDAT1) { - void *new_page; - - new_page = vmemmap_alloc_block(PMD_SIZE, node); - if (!new_page) - goto out; - pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - pt_dir = vmem_pte_alloc(); - if (!pt_dir) - goto out; - pmd_populate(&init_mm, pm_dir, pt_dir); - } else if (pmd_large(*pm_dir)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - - pt_dir = pte_offset_kernel(pm_dir, address); - if (pte_none(*pt_dir)) { - void *new_page; - - new_page = vmemmap_alloc_block(PAGE_SIZE, node); - if (!new_page) - goto out; - pte_val(*pt_dir) = __pa(new_page) | pgt_prot; - } - address += PAGE_SIZE; - } - ret = 0; -out: - return ret; + /* We don't care about the node, just use NUMA_NO_NODE on allocations */ + return add_pagetable(start, end, false); } void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { + remove_pagetable(start, end, false); } void vmem_remove_mapping(unsigned long start, unsigned long size) -- cgit v1.2.3 From c00f05a92424c7788fdbf0909b823f8027596d66 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:53 +0200 Subject: s390/vmemmap: cleanup when vmemmap_populate() fails Cleanup what we partially added in case vmemmap_populate() fails. For vmem, this is already handled by vmem_add_mapping(). Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-5-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 43fe1e2eb90e..be32a38bb91f 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -332,8 +332,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { + int ret; + /* We don't care about the node, just use NUMA_NO_NODE on allocations */ - return add_pagetable(start, end, false); + ret = add_pagetable(start, end, false); + if (ret) + remove_pagetable(start, end, false); + return ret; } void vmemmap_free(unsigned long start, unsigned long end, -- cgit v1.2.3 From aa18e0e65800bf3250b23914a28e0e3fd9cadec2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:54 +0200 Subject: s390/vmemmap: take the vmem_mutex when populating/freeing Let's synchronize all accesses to the 1:1 and vmemmap mappings. This will be especially relevant when wanting to cleanup empty page tables that could be shared by both. Avoid races when removing tables that might be just about to get reused. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-6-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index be32a38bb91f..a2b79681df69 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -334,17 +334,21 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, { int ret; + mutex_lock(&vmem_mutex); /* We don't care about the node, just use NUMA_NO_NODE on allocations */ ret = add_pagetable(start, end, false); if (ret) remove_pagetable(start, end, false); + mutex_unlock(&vmem_mutex); return ret; } void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { + mutex_lock(&vmem_mutex); remove_pagetable(start, end, false); + mutex_unlock(&vmem_mutex); } void vmem_remove_mapping(unsigned long start, unsigned long size) -- cgit v1.2.3 From b9ff81003cf1a0b12b8d60b6ef33a34e84dfe7ac Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:55 +0200 Subject: s390/vmem: cleanup empty page tables Let's cleanup empty page tables. Consider only page tables that fully fall into the idendity mapping and the vmemmap range. As there are no valid accesses to vmem/vmemmap within non-populated ranges, the single tlb flush at the end should be sufficient. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-7-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index a2b79681df69..b831f9f9130a 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -63,6 +63,15 @@ pte_t __ref *vmem_pte_alloc(void) return pte; } +static void vmem_pte_free(unsigned long *table) +{ + /* We don't expect boot memory to be removed ever. */ + if (!slab_is_available() || + WARN_ON_ONCE(PageReserved(virt_to_page(table)))) + return; + page_table_free(&init_mm, table); +} + /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, bool add, bool direct) @@ -105,6 +114,21 @@ out: return ret; } +static void try_free_pte_table(pmd_t *pmd, unsigned long start) +{ + pte_t *pte; + int i; + + /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ + pte = pte_offset_kernel(pmd, start); + for (i = 0; i < PTRS_PER_PTE; i++, pte++) + if (!pte_none(*pte)) + return; + + vmem_pte_free(__va(pmd_deref(*pmd))); + pmd_clear(pmd); +} + /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, unsigned long end, bool add, bool direct) @@ -171,6 +195,8 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, ret = modify_pte_table(pmd, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_pte_table(pmd, addr & PMD_MASK); } ret = 0; out: @@ -179,6 +205,29 @@ out: return ret; } +static void try_free_pmd_table(pud_t *pud, unsigned long start) +{ + const unsigned long end = start + PUD_SIZE; + pmd_t *pmd; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + pmd = pmd_offset(pud, start); + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) + if (!pmd_none(*pmd)) + return; + + vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER); + pud_clear(pud); +} + static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, bool add, bool direct) { @@ -225,6 +274,8 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, ret = modify_pmd_table(pud, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_pmd_table(pud, addr & PUD_MASK); } ret = 0; out: @@ -233,6 +284,29 @@ out: return ret; } +static void try_free_pud_table(p4d_t *p4d, unsigned long start) +{ + const unsigned long end = start + P4D_SIZE; + pud_t *pud; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + pud = pud_offset(p4d, start); + for (i = 0; i < PTRS_PER_PUD; i++, pud++) + if (!pud_none(*pud)) + return; + + vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER); + p4d_clear(p4d); +} + static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, bool add, bool direct) { @@ -257,12 +331,37 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, ret = modify_pud_table(p4d, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_pud_table(p4d, addr & P4D_MASK); } ret = 0; out: return ret; } +static void try_free_p4d_table(pgd_t *pgd, unsigned long start) +{ + const unsigned long end = start + PGDIR_SIZE; + p4d_t *p4d; + int i; + + /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */ + if (end > VMALLOC_START) + return; +#ifdef CONFIG_KASAN + if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) + return; +#endif + + p4d = p4d_offset(pgd, start); + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) + if (!p4d_none(*p4d)) + return; + + vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER); + pgd_clear(pgd); +} + static int modify_pagetable(unsigned long start, unsigned long end, bool add, bool direct) { @@ -291,6 +390,8 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, ret = modify_p4d_table(pgd, addr, next, add, direct); if (ret) goto out; + if (!add) + try_free_p4d_table(pgd, addr & PGDIR_MASK); } ret = 0; out: @@ -319,7 +420,6 @@ static int vmem_add_range(unsigned long start, unsigned long size) /* * Remove a physical memory range from the 1:1 mapping. - * Currently only invalidates page table entries. */ static void vmem_remove_range(unsigned long start, unsigned long size) { -- cgit v1.2.3 From f2057b4266a6be469ea0630971cf3cd933e42cce Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:56 +0200 Subject: s390/vmemmap: fallback to PTEs if mapping large PMD fails Let's fallback to single pages if short on huge pages. No need to stop memory hotplug. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-8-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b831f9f9130a..e82a63de19db 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -180,10 +180,10 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, */ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); - if (!new_page) - goto out; - pmd_val(*pmd) = __pa(new_page) | prot; - continue; + if (new_page) { + pmd_val(*pmd) = __pa(new_page) | prot; + continue; + } } pte = vmem_pte_alloc(); if (!pte) -- cgit v1.2.3 From cd5781d63eaf6dbf89532d8c7c214786b767ee16 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:57 +0200 Subject: s390/vmemmap: remember unused sub-pmd ranges With a memmap size of 56 bytes or 72 bytes per page, the memmap for a 256 MB section won't span full PMDs. As we populate single sections and depopulate single sections, the depopulation step would not be able to free all vmemmap pmds anymore. Do it similarly to x86, marking the unused memmap ranges in a special way (pad it with 0xFD). This allows us to add/remove sections, cleaning up all allocated vmemmap pages even if the memmap size is not multiple of 16 bytes per page. A 56 byte memmap can, for example, be created with !CONFIG_MEMCG and !CONFIG_SLUB. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-9-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index e82a63de19db..df361bbacda1 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -72,6 +72,42 @@ static void vmem_pte_free(unsigned long *table) page_table_free(&init_mm, table); } +#define PAGE_UNUSED 0xFD + +static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +{ + /* + * As we expect to add in the same granularity as we remove, it's + * sufficient to mark only some piece used to block the memmap page from + * getting removed (just in case the memmap never gets initialized, + * e.g., because the memory block never gets onlined). + */ + memset(__va(start), 0, sizeof(struct page)); +} + +static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) +{ + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + + /* Could be our memmap page is filled with PAGE_UNUSED already ... */ + vmemmap_use_sub_pmd(start, end); + + /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ + if (!IS_ALIGNED(start, PMD_SIZE)) + memset(page, PAGE_UNUSED, start - __pa(page)); + if (!IS_ALIGNED(end, PMD_SIZE)) + memset(__va(end), PAGE_UNUSED, __pa(page) + PMD_SIZE - end); +} + +/* Returns true if the PMD is completely unused and can be freed. */ +static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) +{ + void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + + memset(__va(start), PAGE_UNUSED, end - start); + return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); +} + /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, unsigned long end, bool add, bool direct) @@ -157,6 +193,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, get_order(PMD_SIZE)); pmd_clear(pmd); pages++; + } else if (!direct && + vmemmap_unuse_sub_pmd(addr, next)) { + vmem_free_pages(pmd_deref(*pmd), + get_order(PMD_SIZE)); + pmd_clear(pmd); } continue; } @@ -182,6 +223,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, NUMA_NO_NODE); if (new_page) { pmd_val(*pmd) = __pa(new_page) | prot; + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + vmemmap_use_new_sub_pmd(addr, + next); + } continue; } } @@ -189,8 +235,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (!pte) goto out; pmd_populate(&init_mm, pmd, pte); - } else if (pmd_large(*pmd)) + } else if (pmd_large(*pmd)) { + if (!direct) + vmemmap_use_sub_pmd(addr, next); continue; + } ret = modify_pte_table(pmd, addr, next, add, direct); if (ret) -- cgit v1.2.3 From 2c114df071935762ffa88144cdab03d84beaa702 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 Jul 2020 11:45:58 +0200 Subject: s390/vmemmap: avoid memset(PAGE_UNUSED) when adding consecutive sections Let's avoid memset(PAGE_UNUSED) when adding consecutive sections, whereby the vmemmap of a single section does not span full PMDs. Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Gerald Schaefer Signed-off-by: David Hildenbrand Message-Id: <20200722094558.9828-10-david@redhat.com> Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index df361bbacda1..70ebfc7958a6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -74,7 +74,22 @@ static void vmem_pte_free(unsigned long *table) #define PAGE_UNUSED 0xFD -static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +/* + * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges + * from unused_pmd_start to next PMD_SIZE boundary. + */ +static unsigned long unused_pmd_start; + +static void vmemmap_flush_unused_pmd(void) +{ + if (!unused_pmd_start) + return; + memset(__va(unused_pmd_start), PAGE_UNUSED, + ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start); + unused_pmd_start = 0; +} + +static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end) { /* * As we expect to add in the same granularity as we remove, it's @@ -85,18 +100,41 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) memset(__va(start), 0, sizeof(struct page)); } +static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end) +{ + /* + * We only optimize if the new used range directly follows the + * previously unused range (esp., when populating consecutive sections). + */ + if (unused_pmd_start == start) { + unused_pmd_start = end; + if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE))) + unused_pmd_start = 0; + return; + } + vmemmap_flush_unused_pmd(); + __vmemmap_use_sub_pmd(start, end); +} + static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + vmemmap_flush_unused_pmd(); + /* Could be our memmap page is filled with PAGE_UNUSED already ... */ - vmemmap_use_sub_pmd(start, end); + __vmemmap_use_sub_pmd(start, end); /* Mark the unused parts of the new memmap page PAGE_UNUSED. */ if (!IS_ALIGNED(start, PMD_SIZE)) memset(page, PAGE_UNUSED, start - __pa(page)); + /* + * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of + * consecutive sections. Remember for the last added PMD the last + * unused range in the populated PMD. + */ if (!IS_ALIGNED(end, PMD_SIZE)) - memset(__va(end), PAGE_UNUSED, __pa(page) + PMD_SIZE - end); + unused_pmd_start = end; } /* Returns true if the PMD is completely unused and can be freed. */ @@ -104,6 +142,7 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end) { void *page = __va(ALIGN_DOWN(start, PMD_SIZE)); + vmemmap_flush_unused_pmd(); memset(__va(start), PAGE_UNUSED, end - start); return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE); } -- cgit v1.2.3 From 9a996c67a65d937b23408e56935ef23404c9418e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 23 Jul 2020 21:42:36 +0200 Subject: s390/vmemmap: coding style updates Signed-off-by: Heiko Carstens --- arch/s390/mm/vmem.c | 55 +++++++++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 70ebfc7958a6..1aed1a4dfc2d 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -169,17 +169,17 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, pte_clear(&init_mm, addr, pte); } else if (pte_none(*pte)) { if (!direct) { - void *new_page = vmemmap_alloc_block(PAGE_SIZE, - NUMA_NO_NODE); + void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); if (!new_page) goto out; pte_val(*pte) = __pa(new_page) | prot; - } else + } else { pte_val(*pte) = addr | prot; - } else + } + } else { continue; - + } pages++; } ret = 0; @@ -196,10 +196,10 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start) /* We can safely assume this is fully in 1:1 mapping & vmemmap area */ pte = pte_offset_kernel(pmd, start); - for (i = 0; i < PTRS_PER_PTE; i++, pte++) + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { if (!pte_none(*pte)) return; - + } vmem_pte_free(__va(pmd_deref(*pmd))); pmd_clear(pmd); } @@ -220,7 +220,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, pmd = pmd_offset(pud, addr); for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); - if (!add) { if (pmd_none(*pmd)) continue; @@ -228,14 +227,11 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) - vmem_free_pages(pmd_deref(*pmd), - get_order(PMD_SIZE)); + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); pmd_clear(pmd); pages++; - } else if (!direct && - vmemmap_unuse_sub_pmd(addr, next)) { - vmem_free_pages(pmd_deref(*pmd), - get_order(PMD_SIZE)); + } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) { + vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE)); pmd_clear(pmd); } continue; @@ -258,14 +254,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, * page tables since vmemmap_populate gets * called for each section separately. */ - new_page = vmemmap_alloc_block(PMD_SIZE, - NUMA_NO_NODE); + new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE); if (new_page) { pmd_val(*pmd) = __pa(new_page) | prot; if (!IS_ALIGNED(addr, PMD_SIZE) || !IS_ALIGNED(next, PMD_SIZE)) { - vmemmap_use_new_sub_pmd(addr, - next); + vmemmap_use_new_sub_pmd(addr, next); } continue; } @@ -279,7 +273,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, vmemmap_use_sub_pmd(addr, next); continue; } - ret = modify_pte_table(pmd, addr, next, add, direct); if (ret) goto out; @@ -306,12 +299,10 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start) if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end) return; #endif - pmd = pmd_offset(pud, start); for (i = 0; i < PTRS_PER_PMD; i++, pmd++) if (!pmd_none(*pmd)) return; - vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER); pud_clear(pud); } @@ -327,11 +318,9 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, prot = pgprot_val(REGION3_KERNEL); if (!MACHINE_HAS_NX) prot &= ~_REGION_ENTRY_NOEXEC; - pud = pud_offset(p4d, addr); for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); - if (!add) { if (pud_none(*pud)) continue; @@ -356,9 +345,9 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, if (!pmd) goto out; pud_populate(&init_mm, pud, pmd); - } else if (pud_large(*pud)) + } else if (pud_large(*pud)) { continue; - + } ret = modify_pmd_table(pud, addr, next, add, direct); if (ret) goto out; @@ -387,10 +376,10 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start) #endif pud = pud_offset(p4d, start); - for (i = 0; i < PTRS_PER_PUD; i++, pud++) + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { if (!pud_none(*pud)) return; - + } vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER); p4d_clear(p4d); } @@ -406,7 +395,6 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, p4d = p4d_offset(pgd, addr); for (; addr < end; addr = next, p4d++) { next = p4d_addr_end(addr, end); - if (!add) { if (p4d_none(*p4d)) continue; @@ -415,7 +403,6 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end, if (!pud) goto out; } - ret = modify_pud_table(p4d, addr, next, add, direct); if (ret) goto out; @@ -442,10 +429,10 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start) #endif p4d = p4d_offset(pgd, start); - for (i = 0; i < PTRS_PER_P4D; i++, p4d++) + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { if (!p4d_none(*p4d)) return; - + } vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER); pgd_clear(pgd); } @@ -460,7 +447,6 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end))) return -EINVAL; - for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); @@ -474,7 +460,6 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add, goto out; pgd_populate(&init_mm, pgd, p4d); } - ret = modify_p4d_table(pgd, addr, next, add, direct); if (ret) goto out; @@ -518,7 +503,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) * Add a backed mem_map array to the virtual mem_map array. */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) + struct vmem_altmap *altmap) { int ret; @@ -532,7 +517,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, } void vmemmap_free(unsigned long start, unsigned long end, - struct vmem_altmap *altmap) + struct vmem_altmap *altmap) { mutex_lock(&vmem_mutex); remove_pagetable(start, end, false); -- cgit v1.2.3