From bc2c6f5695ffa05c838b8b6fc5cd581a672151a1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 24 Jan 2023 19:38:04 -0800 Subject: powerpc/module_64: Improve restore_r2() return semantics restore_r2() returns 1 on success, which is surprising for a non-boolean function. Change it to return 0 on success and -errno on error to match kernel coding convention. Signed-off-by: Josh Poimboeuf Reviewed-by: Petr Mladek Acked-by: Song Liu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/15baf76c271a0ae09f7b8556e50f2b4251e7049d.1674617130.git.jpoimboe@kernel.org --- arch/powerpc/kernel/module_64.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ff045644f13f..85aebad6470f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -504,7 +504,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) u32 *prev_insn = instruction - 1; if (is_mprofile_ftrace_call(name)) - return 1; + return 0; /* * Make sure the branch isn't a sibling call. Sibling calls aren't @@ -512,19 +512,16 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) * restore afterwards. */ if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) - return 1; + return 0; if (*instruction != PPC_RAW_NOP()) { pr_err("%s: Expected nop after call, got %08x at %pS\n", me->name, *instruction, instruction); - return 0; + return -ENOEXEC; } /* ld r2,R2_STACK_OFFSET(r1) */ - if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) - return 0; - - return 1; + return patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC)); } int apply_relocate_add(Elf64_Shdr *sechdrs, @@ -648,8 +645,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, strtab + sym->st_name); if (!value) return -ENOENT; - if (!restore_r2(strtab + sym->st_name, - (u32 *)location + 1, me)) + if (restore_r2(strtab + sym->st_name, + (u32 *)location + 1, me)) return -ENOEXEC; } else value += local_entry_offset(sym); -- cgit v1.2.3 From 37251c7114e1b743b077ca74b93557c1ad92a97e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 24 Jan 2023 19:38:05 -0800 Subject: powerpc/module_64: Fix "expected nop" error on module re-patching When a module with a livepatched function is unloaded and then reloaded, klp attempts to dynamically re-patch it. On ppc64, that fails with the following error: module_64: livepatch_nfsd: Expected nop after call, got e8410018 at e_show+0x60/0x548 [livepatch_nfsd] livepatch: failed to initialize patch 'livepatch_nfsd' for module 'nfsd' (-8) livepatch: patch 'livepatch_nfsd' failed for module 'nfsd', refusing to load module 'nfsd' The error happens because the restore r2 instruction had already previously been written into the klp module's replacement function when the original function was patched the first time. So the instruction wasn't a nop as expected. When the restore r2 instruction has already been patched in, detect that and skip the warning and the instruction write. Signed-off-by: Josh Poimboeuf Reviewed-by: Petr Mladek Acked-by: Song Liu Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2f6329ffd9674df6ff57e03edeb2ca54414770ab.1674617130.git.jpoimboe@kernel.org --- arch/powerpc/kernel/module_64.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 85aebad6470f..2ac78d207f77 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -502,6 +502,7 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; + u32 insn_val = *instruction; if (is_mprofile_ftrace_call(name)) return 0; @@ -514,9 +515,18 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 0; - if (*instruction != PPC_RAW_NOP()) { + /* + * For livepatch, the restore r2 instruction might have already been + * written previously, if the referenced symbol is in a previously + * unloaded module which is now being loaded again. In that case, skip + * the warning and the instruction write. + */ + if (insn_val == PPC_INST_LD_TOC) + return 0; + + if (insn_val != PPC_RAW_NOP()) { pr_err("%s: Expected nop after call, got %08x at %pS\n", - me->name, *instruction, instruction); + me->name, insn_val, instruction); return -ENOEXEC; } -- cgit v1.2.3 From 45f7091aac3546ef8112bf62836650ca0bbf0b79 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Jan 2023 08:38:59 +0100 Subject: powerpc/64: Set default CPU in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 0069f3d14e7a ("powerpc/64e: Tie PPC_BOOK3E_64 to PPC_E500MC"), the only possible BOOK3E/64 are E500, so no need of a default CPU over the E5500. When the user selects book3e, they must have an e500 compatible compiler, and it won't work anymore with the default -mcpu=power64, see commit d6b551b8f90c ("powerpc/64e: Fix build failure with GCC 12 (unrecognized opcode: `wrteei')"). For book3s/64, replace GENERIC_CPU by POWERPC64_CPU to match the PPC32 POWERPC_CPU, and set a default mpcu value in Kconfig directly. When a user selects a particular CPU, they must ensure the compiler has the requested capability. Therefore, remove hidden fallback, instead offer user the possibility to say they want to use the toolchain default. Fixes: d6b551b8f90c ("powerpc/64e: Fix build failure with GCC 12 (unrecognized opcode: `wrteei')") Reported-by: Pali Rohár Tested-by: Pali Rohár Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/76c11197b058193dcb8e8b26adffba09cfbdab11.1674632329.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 22 +++++----------------- arch/powerpc/platforms/Kconfig.cputype | 12 +++++++----- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index dc4cbf0a5ca9..bf5f0a998273 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -146,19 +146,6 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD)) CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) -ifdef CONFIG_PPC_BOOK3S_64 -ifdef CONFIG_CPU_LITTLE_ENDIAN -CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8 -else -CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4 -endif -CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power10, \ - $(call cc-option,-mtune=power9, \ - $(call cc-option,-mtune=power8))) -else ifdef CONFIG_PPC_BOOK3E_64 -CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 -endif - ifdef CONFIG_FUNCTION_TRACER CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL @@ -166,11 +153,12 @@ CC_FLAGS_FTRACE += -mprofile-kernel endif endif -CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) -AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) +CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU) +AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU) -CFLAGS-$(CONFIG_E5500_CPU) += $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) -CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) +CFLAGS-$(CONFIG_POWERPC64_CPU) += $(call cc-option,-mtune=power10, \ + $(call cc-option,-mtune=power9, \ + $(call cc-option,-mtune=power8))) asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9563336e3348..31cea2eeb59e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -118,19 +118,18 @@ endchoice choice prompt "CPU selection" - default GENERIC_CPU help This will create a kernel which is optimised for a particular CPU. The resulting kernel may not run on other CPUs, so use this with care. If unsure, select Generic. -config GENERIC_CPU +config POWERPC64_CPU bool "Generic (POWER5 and PowerPC 970 and above)" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN select PPC_64S_HASH_MMU -config GENERIC_CPU +config POWERPC64_CPU bool "Generic (POWER8 and above)" depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER @@ -233,13 +232,12 @@ config E500MC_CPU config TOOLCHAIN_DEFAULT_CPU bool "Rely on the toolchain's implicit default CPU" - depends on PPC32 endchoice config TARGET_CPU_BOOL bool - default !GENERIC_CPU && !TOOLCHAIN_DEFAULT_CPU + default !TOOLCHAIN_DEFAULT_CPU config TARGET_CPU string @@ -251,6 +249,10 @@ config TARGET_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU default "power10" if POWER10_CPU + default "e500mc64" if E5500_CPU + default "e6500" if E6500_CPU + default "power4" if POWERPC64_CPU && !CPU_LITTLE_ENDIAN + default "power8" if POWERPC64_CPU && CPU_LITTLE_ENDIAN default "405" if 405_CPU default "440" if 440_CPU default "464" if 464_CPU -- cgit v1.2.3 From ff7c76f66d8bad4e694c264c789249e1d3a8205d Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Wed, 25 Jan 2023 08:39:00 +0100 Subject: powerpc/boot: Don't always pass -mcpu=powerpc when building 32-bit uImage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_TARGET_CPU is specified then pass its value to the compiler -mcpu option. This fixes following build error when building kernel with powerpc e500 SPE capable cross compilers: BOOTAS arch/powerpc/boot/crt0.o powerpc-linux-gnuspe-gcc: error: unrecognized argument in option ‘-mcpu=powerpc’ powerpc-linux-gnuspe-gcc: note: valid arguments to ‘-mcpu=’ are: 8540 8548 native make[1]: *** [arch/powerpc/boot/Makefile:231: arch/powerpc/boot/crt0.o] Error 1 Similar change was already introduced for the main powerpc Makefile in commit 446cda1b21d9 ("powerpc/32: Don't always pass -mcpu=powerpc to the compiler"). Fixes: 40a75584e526 ("powerpc/boot: Build wrapper for an appropriate CPU") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Pali Rohár Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2ae3ae5887babfdacc34435bff0944b3f336100a.1674632329.git.christophe.leroy@csgroup.eu --- arch/powerpc/boot/Makefile | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index d32d95aea5d6..295f76df13b5 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -39,13 +39,19 @@ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ $(LINUXINCLUDE) ifdef CONFIG_PPC64_BOOT_WRAPPER -ifdef CONFIG_CPU_LITTLE_ENDIAN -BOOTCFLAGS += -m64 -mcpu=powerpc64le +BOOTCFLAGS += -m64 else -BOOTCFLAGS += -m64 -mcpu=powerpc64 +BOOTCFLAGS += -m32 endif + +ifdef CONFIG_TARGET_CPU_BOOL +BOOTCFLAGS += -mcpu=$(CONFIG_TARGET_CPU) +else ifdef CONFIG_PPC64_BOOT_WRAPPER +ifdef CONFIG_CPU_LITTLE_ENDIAN +BOOTCFLAGS += -mcpu=powerpc64le else -BOOTCFLAGS += -m32 -mcpu=powerpc +BOOTCFLAGS += -mcpu=powerpc64 +endif endif BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) -- cgit v1.2.3 From 5ff92e2f274dc42a9e534473121273cd209d3501 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 24 Jan 2023 08:04:45 -0600 Subject: powerpc/rtas: unexport 'rtas' symbol No modular code needs access to the 'rtas' struct, so remove the symbol export. Suggested-by: Michael Ellerman Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230124140448.45938-2-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index deded51a7978..6c5716b19d69 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -63,7 +63,6 @@ static inline void do_enter_rtas(unsigned long args) struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; -EXPORT_SYMBOL(rtas); DEFINE_SPINLOCK(rtas_data_buf_lock); EXPORT_SYMBOL(rtas_data_buf_lock); -- cgit v1.2.3 From 0d7e812fd282bf248b54523cc550a34b77c2e9a2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 26 Jan 2023 21:49:00 +1100 Subject: powerpc/rtas: Drop unused export symbols Some RTAS symbols are never used by modular code, drop their exports. Signed-off-by: Michael Ellerman Reviewed-by: Nathan Lynch Link: https://lore.kernel.org/r/20230127111231.84294-1-mpe@ellerman.id.au --- arch/powerpc/kernel/rtas.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6c5716b19d69..149742119b6d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -341,7 +341,6 @@ int rtas_service_present(const char *service) { return rtas_token(service) != RTAS_UNKNOWN_SERVICE; } -EXPORT_SYMBOL(rtas_service_present); #ifdef CONFIG_RTAS_ERROR_LOGGING @@ -356,7 +355,6 @@ int rtas_get_error_log_max(void) { return rtas_error_log_max; } -EXPORT_SYMBOL(rtas_get_error_log_max); static void __init init_error_log_max(void) { @@ -622,7 +620,6 @@ unsigned int rtas_busy_delay_time(int status) return ms; } -EXPORT_SYMBOL(rtas_busy_delay_time); /** * rtas_busy_delay() - helper for RTAS busy and extended delay statuses @@ -820,7 +817,6 @@ bool rtas_indicator_present(int token, int *maxindex) return false; } -EXPORT_SYMBOL(rtas_indicator_present); int rtas_set_indicator(int indicator, int index, int new_value) { -- cgit v1.2.3 From 9bce6243848dfd0ff7c2be6e8d82ab9b1e6c7858 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 24 Jan 2023 08:04:46 -0600 Subject: powerpc/rtas: make all exports GPL The first symbol exports of RTAS functions and data came with the (now removed) scanlog driver in 2003: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/?id=f92e361842d5251e50562b09664082dcbd0548bb At the time this was applied, EXPORT_SYMBOL_GPL() was very new, and the exports of rtas_call() etc have remained non-GPL. As new APIs have been added to the RTAS subsystem, their symbol exports have followed the convention set by existing code. However, the historical evidence is that RTAS function exports have been added over time only to satisfy the needs of in-kernel users, and these clients must have fairly intimate knowledge of how the APIs work to use them safely. No out of tree users are known, and future ones seem unlikely. Arguably the default for RTAS symbols should have become EXPORT_SYMBOL_GPL once it was available. Let's make it so now, and exceptions can be evaluated as needed. Signed-off-by: Nathan Lynch Reviewed-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230124140448.45938-3-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 149742119b6d..b539eab60088 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -65,10 +65,10 @@ struct rtas_t rtas = { }; DEFINE_SPINLOCK(rtas_data_buf_lock); -EXPORT_SYMBOL(rtas_data_buf_lock); +EXPORT_SYMBOL_GPL(rtas_data_buf_lock); char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; -EXPORT_SYMBOL(rtas_data_buf); +EXPORT_SYMBOL_GPL(rtas_data_buf); unsigned long rtas_rmo_buf; @@ -77,7 +77,7 @@ unsigned long rtas_rmo_buf; * This is done like this so rtas_flash can be a module. */ void (*rtas_flash_term_hook)(int); -EXPORT_SYMBOL(rtas_flash_term_hook); +EXPORT_SYMBOL_GPL(rtas_flash_term_hook); /* RTAS use home made raw locking instead of spin_lock_irqsave * because those can be called from within really nasty contexts @@ -325,7 +325,7 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); } -EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ +EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { @@ -335,7 +335,7 @@ int rtas_token(const char *service) tokp = of_get_property(rtas.dev, service, NULL); return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE; } -EXPORT_SYMBOL(rtas_token); +EXPORT_SYMBOL_GPL(rtas_token); int rtas_service_present(const char *service) { @@ -582,7 +582,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } return ret; } -EXPORT_SYMBOL(rtas_call); +EXPORT_SYMBOL_GPL(rtas_call); /** * rtas_busy_delay_time() - From an RTAS status value, calculate the @@ -693,7 +693,7 @@ bool rtas_busy_delay(int status) return ret; } -EXPORT_SYMBOL(rtas_busy_delay); +EXPORT_SYMBOL_GPL(rtas_busy_delay); static int rtas_error_rc(int rtas_rc) { @@ -738,7 +738,7 @@ int rtas_get_power_level(int powerdomain, int *level) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_power_level); +EXPORT_SYMBOL_GPL(rtas_get_power_level); int rtas_set_power_level(int powerdomain, int level, int *setlevel) { @@ -756,7 +756,7 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_power_level); +EXPORT_SYMBOL_GPL(rtas_set_power_level); int rtas_get_sensor(int sensor, int index, int *state) { @@ -774,7 +774,7 @@ int rtas_get_sensor(int sensor, int index, int *state) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_sensor); +EXPORT_SYMBOL_GPL(rtas_get_sensor); int rtas_get_sensor_fast(int sensor, int index, int *state) { @@ -834,7 +834,7 @@ int rtas_set_indicator(int indicator, int index, int new_value) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_indicator); +EXPORT_SYMBOL_GPL(rtas_set_indicator); /* * Ignoring RTAS extended delay -- cgit v1.2.3 From 599af49155467148afaf0bc3c0114bd80fd4491f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 24 Jan 2023 08:04:47 -0600 Subject: powerpc/rtas: remove lock and args fields from global rtas struct Only code internal to the RTAS subsystem needs access to the central lock and parameter block. Remove these from the globally visible 'rtas' struct and make them file-static in rtas.c. Some changed lines in rtas_call() lack appropriate spacing around operators and cause checkpatch errors; fix these as well. Suggested-by: Laurent Dufour Signed-off-by: Nathan Lynch Reviewed-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230124140448.45938-4-nathanl@linux.ibm.com --- arch/powerpc/include/asm/rtas-types.h | 2 -- arch/powerpc/kernel/rtas.c | 50 ++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h index 8df6235d64d1..f2ad4a96cbc5 100644 --- a/arch/powerpc/include/asm/rtas-types.h +++ b/arch/powerpc/include/asm/rtas-types.h @@ -18,8 +18,6 @@ struct rtas_t { unsigned long entry; /* physical address pointer */ unsigned long base; /* physical address pointer */ unsigned long size; - arch_spinlock_t lock; - struct rtas_args args; struct device_node *dev; /* virtual address pointer */ }; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index b539eab60088..1a1c07df6f87 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -60,9 +60,17 @@ static inline void do_enter_rtas(unsigned long args) srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ } -struct rtas_t rtas = { - .lock = __ARCH_SPIN_LOCK_UNLOCKED -}; +struct rtas_t rtas; + +/* + * Nearly all RTAS calls need to be serialized. All uses of the + * default rtas_args block must hold rtas_lock. + * + * Exceptions to the RTAS serialization requirement (e.g. stop-self) + * must use a separate rtas_args structure. + */ +static arch_spinlock_t rtas_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static struct rtas_args rtas_args; DEFINE_SPINLOCK(rtas_data_buf_lock); EXPORT_SYMBOL_GPL(rtas_data_buf_lock); @@ -90,13 +98,13 @@ static unsigned long lock_rtas(void) local_irq_save(flags); preempt_disable(); - arch_spin_lock(&rtas.lock); + arch_spin_lock(&rtas_lock); return flags; } static void unlock_rtas(unsigned long flags) { - arch_spin_unlock(&rtas.lock); + arch_spin_unlock(&rtas_lock); local_irq_restore(flags); preempt_enable(); } @@ -114,7 +122,7 @@ static void call_rtas_display_status(unsigned char c) return; s = lock_rtas(); - rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c); + rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c); unlock_rtas(s); } @@ -384,7 +392,7 @@ static int rtas_last_error_token; * most recent failed call to rtas. Because the error text * might go stale if there are any other intervening rtas calls, * this routine must be called atomically with whatever produced - * the error (i.e. with rtas.lock still held from the previous call). + * the error (i.e. with rtas_lock still held from the previous call). */ static char *__fetch_rtas_last_error(char *altbuf) { @@ -404,13 +412,13 @@ static char *__fetch_rtas_last_error(char *altbuf) err_args.args[1] = cpu_to_be32(bufsz); err_args.args[2] = 0; - save_args = rtas.args; - rtas.args = err_args; + save_args = rtas_args; + rtas_args = err_args; - do_enter_rtas(__pa(&rtas.args)); + do_enter_rtas(__pa(&rtas_args)); - err_args = rtas.args; - rtas.args = save_args; + err_args = rtas_args; + rtas_args = save_args; /* Log the error in the unlikely case that there was one. */ if (unlikely(err_args.args[2] == 0)) { @@ -532,7 +540,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) va_list list; int i; unsigned long s; - struct rtas_args *rtas_args; + struct rtas_args *args; char *buff_copy = NULL; int ret; @@ -557,21 +565,21 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) s = lock_rtas(); /* We use the global rtas args buffer */ - rtas_args = &rtas.args; + args = &rtas_args; va_start(list, outputs); - va_rtas_call_unlocked(rtas_args, token, nargs, nret, list); + va_rtas_call_unlocked(args, token, nargs, nret, list); va_end(list); /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ - if (be32_to_cpu(rtas_args->rets[0]) == -1) + if (be32_to_cpu(args->rets[0]) == -1) buff_copy = __fetch_rtas_last_error(NULL); if (nret > 1 && outputs != NULL) for (i = 0; i < nret-1; ++i) - outputs[i] = be32_to_cpu(rtas_args->rets[i+1]); - ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0; + outputs[i] = be32_to_cpu(args->rets[i + 1]); + ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; unlock_rtas(s); @@ -1265,9 +1273,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) flags = lock_rtas(); - rtas.args = args; - do_enter_rtas(__pa(&rtas.args)); - args = rtas.args; + rtas_args = args; + do_enter_rtas(__pa(&rtas_args)); + args = rtas_args; /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ -- cgit v1.2.3 From 12fd66651df6c807b7b6f420ee0fd420f54991f4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 24 Jan 2023 08:04:48 -0600 Subject: powerpc/rtas: upgrade internal arch spinlocks At the time commit f97bb36f705d ("powerpc/rtas: Turn rtas lock into a raw spinlock") was written, the spinlock lockup detection code called __delay(), which will not make progress if the timebase is not advancing. Since the interprocessor timebase synchronization sequence for chrp, cell, and some now-unsupported Power models can temporarily freeze the timebase through an RTAS function (freeze-time-base), the lock that serializes most RTAS calls was converted to arch_spinlock_t to prevent kernel hangs in the lockup detection code. However, commit bc88c10d7e69 ("locking/spinlock/debug: Remove spinlock lockup detection code") removed that inconvenient property from the lock debug code several years ago. So now it should be safe to reintroduce generic locks into the RTAS support code, primarily to increase lockdep coverage. Making rtas_lock a spinlock_t would violate lock type nesting rules because it can be acquired while holding raw locks, e.g. pci_lock and irq_desc->lock. So convert it to raw_spinlock_t. There's no apparent reason not to upgrade timebase_lock as well. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230124140448.45938-5-nathanl@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 52 +++++++++++++--------------------------------- 1 file changed, 14 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1a1c07df6f87..795225d7f138 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -69,7 +69,7 @@ struct rtas_t rtas; * Exceptions to the RTAS serialization requirement (e.g. stop-self) * must use a separate rtas_args structure. */ -static arch_spinlock_t rtas_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static DEFINE_RAW_SPINLOCK(rtas_lock); static struct rtas_args rtas_args; DEFINE_SPINLOCK(rtas_data_buf_lock); @@ -87,28 +87,6 @@ unsigned long rtas_rmo_buf; void (*rtas_flash_term_hook)(int); EXPORT_SYMBOL_GPL(rtas_flash_term_hook); -/* RTAS use home made raw locking instead of spin_lock_irqsave - * because those can be called from within really nasty contexts - * such as having the timebase stopped which would lockup with - * normal locks and spinlock debugging enabled - */ -static unsigned long lock_rtas(void) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - arch_spin_lock(&rtas_lock); - return flags; -} - -static void unlock_rtas(unsigned long flags) -{ - arch_spin_unlock(&rtas_lock); - local_irq_restore(flags); - preempt_enable(); -} - /* * call_rtas_display_status and call_rtas_display_status_delay * are designed only for very early low-level debugging, which @@ -116,14 +94,14 @@ static void unlock_rtas(unsigned long flags) */ static void call_rtas_display_status(unsigned char c) { - unsigned long s; + unsigned long flags; if (!rtas.base) return; - s = lock_rtas(); + raw_spin_lock_irqsave(&rtas_lock, flags); rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c); - unlock_rtas(s); + raw_spin_unlock_irqrestore(&rtas_lock, flags); } static void call_rtas_display_status_delay(char c) @@ -539,7 +517,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; int i; - unsigned long s; + unsigned long flags; struct rtas_args *args; char *buff_copy = NULL; int ret; @@ -562,8 +540,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) return -1; } - s = lock_rtas(); - + raw_spin_lock_irqsave(&rtas_lock, flags); /* We use the global rtas args buffer */ args = &rtas_args; @@ -581,7 +558,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) outputs[i] = be32_to_cpu(args->rets[i + 1]); ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; - unlock_rtas(s); + raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); @@ -1271,7 +1248,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); - flags = lock_rtas(); + raw_spin_lock_irqsave(&rtas_lock, flags); rtas_args = args; do_enter_rtas(__pa(&rtas_args)); @@ -1282,7 +1259,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) if (be32_to_cpu(args.rets[0]) == -1) errbuf = __fetch_rtas_last_error(buff_copy); - unlock_rtas(flags); + raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { if (errbuf) @@ -1404,19 +1381,18 @@ int __init early_init_dt_scan_rtas(unsigned long node, return 1; } -static arch_spinlock_t timebase_lock; +static DEFINE_RAW_SPINLOCK(timebase_lock); static u64 timebase = 0; void rtas_give_timebase(void) { unsigned long flags; - local_irq_save(flags); + raw_spin_lock_irqsave(&timebase_lock, flags); hard_irq_disable(); - arch_spin_lock(&timebase_lock); rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); timebase = get_tb(); - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); while (timebase) barrier(); @@ -1428,8 +1404,8 @@ void rtas_take_timebase(void) { while (!timebase) barrier(); - arch_spin_lock(&timebase_lock); + raw_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); } -- cgit v1.2.3 From bab537805a10bdbf55b31324ba4a9599e0651e5e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 Jan 2023 13:26:46 +0100 Subject: powerpc: Check !irq instead of irq == NO_IRQ and remove NO_IRQ NO_IRQ is a relic from the old days. It is not used anymore in core functions. By the way, function irq_of_parse_and_map() returns value 0 on error. In some drivers, NO_IRQ is erroneously used to check the return of irq_of_parse_and_map(). It is not a real bug today because the only architectures using the drivers being fixed by this patch define NO_IRQ as 0, but there are architectures which define NO_IRQ as -1. If one day those architectures start using the non fixed drivers, there will be a problem. Long time ago Linus advocated for not using NO_IRQ, see https://lore.kernel.org/all/Pine.LNX.4.64.0511211150040.13959@g5.osdl.org He re-iterated the same view recently in https://lore.kernel.org/all/CAHk-=wg2Pkb9kbfbstbB91AJA2SF6cySbsgHG-iQMq56j3VTcA@mail.gmail.com So test !irq instead of tesing irq == NO_IRQ. All other usage of NO_IRQ for powerpc were removed in previous cycles so the time has come to remove NO_IRQ completely for powerpc. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4b8d4f96140af01dec3a3330924dda8b2451c316.1674476798.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/irq.h | 3 --- arch/powerpc/platforms/44x/fsp2.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 5c1516a5ba8f..deadd2149426 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -16,9 +16,6 @@ extern atomic_t ppc_n_lost_interrupts; -/* This number is used when no interrupt has been assigned */ -#define NO_IRQ (0) - /* Total number of virq in the platform */ #define NR_IRQS CONFIG_NR_IRQS diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index e2e4f6d8150d..56d91dbef577 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -205,7 +205,7 @@ static void __init node_irq_request(const char *compat, irq_handler_t errirq_han for_each_compatible_node(np, NULL, compat) { irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (!irq) { pr_err("device tree node %pOFn is missing a interrupt", np); of_node_put(np); -- cgit v1.2.3 From 5746ca131e2496ccd5bb4d7a0244d6c38070cbf5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 21 Jan 2023 20:26:18 +1000 Subject: powerpc/64: Don't recurse irq replay Interrupt handlers called by soft-pending irq replay code can run softirqs, softirq replay enables and disables local irqs, which allows interrupts to come in including soft-masked interrupts, and it can cause pending irqs to be replayed again. That makes the soft irq replay state machine and possible races more complicated and fragile than it needs to be. Use irq_enter/irq_exit around irq replay to prevent softirqs running while interrupts are being replayed. Softirqs will now be run at the irq_exit() call after all the irq replaying is done. This prevents irqs being replayed while irqs are being replayed, and should hopefully make things simpler and easier to think about and debug. A new PACA_IRQ_REPLAYING is added to prevent asynchronous interrupt handlers hard-enabling EE while pending irqs are being replayed, because that causes new pending irqs to arrive which is also a complexity. This means pending irqs won't be profiled quite so well because perf irqs can't be taken. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230121102618.2824429-1-npiggin@gmail.com --- arch/powerpc/include/asm/hw_irq.h | 6 ++- arch/powerpc/kernel/irq_64.c | 101 +++++++++++++++++++++++++------------- 2 files changed, 70 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 77fa88c2aed0..24b7d5facb85 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -36,15 +36,17 @@ #define PACA_IRQ_DEC 0x08 /* Or FIT */ #define PACA_IRQ_HMI 0x10 #define PACA_IRQ_PMI 0x20 +#define PACA_IRQ_REPLAYING 0x40 /* * Some soft-masked interrupts must be hard masked until they are replayed * (e.g., because the soft-masked handler does not clear the exception). + * Interrupt replay itself must remain hard masked too. */ #ifdef CONFIG_PPC_BOOK3S -#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI) +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI|PACA_IRQ_REPLAYING) #else -#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) +#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_REPLAYING) #endif #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index eb2b380e52a0..9dc0ad3c533a 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -70,22 +70,19 @@ int distribute_irqs = 1; static inline void next_interrupt(struct pt_regs *regs) { - /* - * Softirq processing can enable/disable irqs, which will leave - * MSR[EE] enabled and the soft mask set to IRQS_DISABLED. Fix - * this up. - */ - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - else - irq_soft_mask_set(IRQS_ALL_DISABLED); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + } /* * We are responding to the next interrupt, so interrupt-off * latencies should be reset here. */ + lockdep_hardirq_exit(); trace_hardirqs_on(); trace_hardirqs_off(); + lockdep_hardirq_enter(); } static inline bool irq_happened_test_and_clear(u8 irq) @@ -97,22 +94,11 @@ static inline bool irq_happened_test_and_clear(u8 irq) return false; } -void replay_soft_interrupts(void) +static void __replay_soft_interrupts(void) { struct pt_regs regs; /* - * Be careful here, calling these interrupt handlers can cause - * softirqs to be raised, which they may run when calling irq_exit, - * which will cause local_irq_enable() to be run, which can then - * recurse into this function. Don't keep any state across - * interrupt handler calls which may change underneath us. - * - * Softirqs can not be disabled over replay to stop this recursion - * because interrupts taken in idle code may require RCU softirq - * to run in the irq RCU tracking context. This is a hard problem - * to fix without changes to the softirq or idle layer. - * * We use local_paca rather than get_paca() to avoid all the * debug_smp_processor_id() business in this low level function. */ @@ -120,13 +106,20 @@ void replay_soft_interrupts(void) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { WARN_ON_ONCE(mfmsr() & MSR_EE); WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING); } + /* + * PACA_IRQ_REPLAYING prevents interrupt handlers from enabling + * MSR[EE] to get PMIs, which can result in more IRQs becoming + * pending. + */ + local_paca->irq_happened |= PACA_IRQ_REPLAYING; + ppc_save_regs(®s); regs.softe = IRQS_ENABLED; regs.msr |= MSR_EE; -again: /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -175,13 +168,14 @@ again: next_interrupt(®s); } - /* - * Softirq processing can enable and disable interrupts, which can - * result in new irqs becoming pending. Must keep looping until we - * have cleared out all pending interrupts. - */ - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) - goto again; + local_paca->irq_happened &= ~PACA_IRQ_REPLAYING; +} + +void replay_soft_interrupts(void) +{ + irq_enter(); /* See comment in arch_local_irq_restore */ + __replay_soft_interrupts(); + irq_exit(); } #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) @@ -200,13 +194,13 @@ static inline void replay_soft_interrupts_irqrestore(void) if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(AMR_KUAP_BLOCKED); - replay_soft_interrupts(); + __replay_soft_interrupts(); if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(kuap_state); } #else -#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#define replay_soft_interrupts_irqrestore() __replay_soft_interrupts() #endif notrace void arch_local_irq_restore(unsigned long mask) @@ -219,9 +213,13 @@ notrace void arch_local_irq_restore(unsigned long mask) return; } - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(in_nmi() || in_hardirq()); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON_ONCE(in_nmi()); + WARN_ON_ONCE(in_hardirq()); + WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING); + } +again: /* * After the stb, interrupts are unmasked and there are no interrupts * pending replay. The restart sequence makes this atomic with @@ -248,6 +246,12 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + /* + * If we came here from the replay below, we might have a preempt + * pending (due to preempt_enable_no_resched()). Have to check now. + */ + preempt_check_resched(); + return; happened: @@ -261,6 +265,7 @@ happened: irq_soft_mask_set(IRQS_ENABLED); local_paca->irq_happened = 0; __hard_irq_enable(); + preempt_check_resched(); return; } @@ -296,12 +301,38 @@ happened: irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); + /* + * Now enter interrupt context. The interrupt handlers themselves + * also call irq_enter/exit (which is okay, they can nest). But call + * it here now to hold off softirqs until the below irq_exit(). If + * we allowed replayed handlers to run softirqs, that enables irqs, + * which must replay interrupts, which recurses in here and makes + * things more complicated. The recursion is limited to 2, and it can + * be made to work, but it's complicated. + * + * local_bh_disable can not be used here because interrupts taken in + * idle are not in the right context (RCU, tick, etc) to run softirqs + * so irq_enter must be called. + */ + irq_enter(); + replay_soft_interrupts_irqrestore(); + irq_exit(); + + if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) { + /* + * The softirq processing in irq_exit() may enable interrupts + * temporarily, which can result in MSR[EE] being enabled and + * more irqs becoming pending. Go around again if that happens. + */ + trace_hardirqs_on(); + preempt_enable_no_resched(); + goto again; + } + trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS); local_paca->irq_happened = 0; __hard_irq_enable(); preempt_enable(); -- cgit v1.2.3 From 34557b7504778e32b166cf46c6f774086124e845 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sat, 28 Jan 2023 14:34:58 +0100 Subject: powerpc/pci: Enable PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes sense to enable CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT by default (when possible by dependencies) to take advantages of all 256 PCI buses on each PCI domain, like it is already on all other kernel architectures. Fixes: 566356813082 ("powerpc/pci: Add config option for using all 256 PCI buses") Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230128133459.32123-1-pali@kernel.org --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b8c4ac56bddc..fad25aa602c8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -392,6 +392,7 @@ config PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT depends on PPC32 depends on !PPC_PMAC && !PPC_CHRP bool "Assign PCI bus numbers from zero individually for each PCI domain" + default y help By default on PPC32 were PCI bus numbers unique across all PCI domains. So system could have only 256 PCI buses independently of available -- cgit v1.2.3 From 5d2eb73aa0fd57844938a22bddc59f7bc8183924 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Mon, 6 Feb 2023 22:39:02 +1100 Subject: powerpc/pci: Add option for using pci_to_OF_bus_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "pci-OF-bus-map" property was declared deprecated in 2006 [1] and to the best of everyone's knowledge is not used by anything anymore [2]. The creation of the property was disabled on powermac (arch/powerpc) in 2005 by commit 35499c0195e4 ("powerpc: Merge in 64-bit powermac support."). But it is still created by default on CHRP. On powermac the actual map (pci_to_OF_bus_map) is still used by default, even though the device tree property is not created. Add an option to enable/disable use of the pci_to_OF_bus_map, and creation of the property (on CHRP). Disabling the option allows enabling CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT which allows "normal" bus numbering and more than 256 buses, like 64-bit and other architectures. Mark the new option as default n, the intention is that the option and the code will be removed in a future release. [1]: https://lore.kernel.org/linuxppc-dev/1148016268.13249.14.camel@localhost.localdomain/ [2]: https://lore.kernel.org/all/575f239205e8635add81c9f902b7d9db7beb83ea.camel@kernel.crashing.org/ Signed-off-by: Pali Rohár [mpe: Reword commit & help text, shrink option name, rework to fix build errors] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206113902.1857123-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 13 ++++++++++++- arch/powerpc/include/asm/pci-bridge.h | 4 +++- arch/powerpc/kernel/pci_32.c | 17 ++++++++++++----- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fad25aa602c8..7dcb011cebf2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -388,9 +388,20 @@ config PPC_DCR depends on PPC_DCR_NATIVE || PPC_DCR_MMIO default y +config PPC_PCI_OF_BUS_MAP + bool "Use pci_to_OF_bus_map (deprecated)" + depends on PPC32 + depends on PPC_PMAC || PPC_CHRP + help + This option uses pci_to_OF_bus_map to map OF nodes to PCI devices, which + restricts the system to only having 256 PCI buses. On CHRP it also causes + the "pci-OF-bus-map" property to be created in the device tree. + + If unsure, say "N". + config PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT depends on PPC32 - depends on !PPC_PMAC && !PPC_CHRP + depends on !PPC_PCI_OF_BUS_MAP bool "Assign PCI bus numbers from zero individually for each PCI domain" default y help diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index e18c95f4e1d4..71c1d26f2400 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -176,8 +176,10 @@ extern int pci_device_from_OF_node(struct device_node *node, #endif #ifndef CONFIG_PPC64 -#ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP extern void pci_create_OF_bus_map(void); +#else +static inline void pci_create_OF_bus_map(void) {} #endif #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 855b59892c5c..ce0c8623e563 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -62,7 +62,7 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP static u8* pci_to_OF_bus_map; static int pci_bus_count; @@ -152,6 +152,7 @@ pcibios_make_OF_bus_map(void) } #endif } +#endif // CONFIG_PPC_PCI_OF_BUS_MAP #ifdef CONFIG_PPC_PMAC @@ -160,7 +161,9 @@ pcibios_make_OF_bus_map(void) */ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) { +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP struct pci_dev *dev = NULL; +#endif const __be32 *reg; int size; @@ -175,6 +178,9 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) *bus = (be32_to_cpup(®[0]) >> 16) & 0xff; *devfn = (be32_to_cpup(®[0]) >> 8) & 0xff; +#ifndef CONFIG_PPC_PCI_OF_BUS_MAP + return 0; +#else /* Ok, here we need some tweak. If we have already renumbered * all busses, we can't rely on the OF bus number any more. * the pci_to_OF_bus_map is not enough as several PCI busses @@ -192,11 +198,12 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) } return -ENODEV; +#endif // CONFIG_PPC_PCI_OF_BUS_MAP } EXPORT_SYMBOL(pci_device_from_OF_node); #endif -#ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree */ @@ -221,9 +228,7 @@ pci_create_OF_bus_map(void) of_node_put(dn); } } -#endif - -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) */ +#endif // CONFIG_PPC_PCI_OF_BUS_MAP void pcibios_setup_phb_io_space(struct pci_controller *hose) { @@ -273,6 +278,7 @@ static int __init pcibios_init(void) } #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP pci_bus_count = next_busno; /* OpenFirmware based machines need a map of OF bus @@ -281,6 +287,7 @@ static int __init pcibios_init(void) */ if (pci_assign_all_buses) pcibios_make_OF_bus_map(); +#endif #endif /* Call common code to handle resource allocation */ -- cgit v1.2.3 From 4b10306e98456aed03cad75ce467e8b1efdccca0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 2 Feb 2023 12:01:04 +0100 Subject: powerpc: Disable CPU unknown by CLANG when CC_IS_CLANG CLANG only knows the following CPUs: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, futur Disable other ones when CC_IS_CLANG. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e62892e32c14a7a5738c597e39e0082cb0abf21c.1675335659.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 31cea2eeb59e..3219e974ec95 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -143,6 +143,7 @@ config POWERPC_CPU config CELL_CPU bool "Cell Broadband Engine" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + depends on !CC_IS_CLANG select PPC_64S_HASH_MMU config PPC_970_CPU @@ -187,11 +188,13 @@ config E5500_CPU config E6500_CPU bool "Freescale e6500" depends on PPC64 && PPC_E500 + depends on !CC_IS_CLANG select PPC_HAS_LBARX_LHARX config 405_CPU bool "40x family" depends on 40x + depends on !CC_IS_CLANG config 440_CPU bool "440 (44x family)" @@ -200,22 +203,27 @@ config 440_CPU config 464_CPU bool "464 (44x family)" depends on 44x + depends on !CC_IS_CLANG config 476_CPU bool "476 (47x family)" depends on PPC_47x + depends on !CC_IS_CLANG config 860_CPU bool "8xx family" depends on PPC_8xx + depends on !CC_IS_CLANG config E300C2_CPU bool "e300c2 (832x)" depends on PPC_BOOK3S_32 + depends on !CC_IS_CLANG config E300C3_CPU bool "e300c3 (831x)" depends on PPC_BOOK3S_32 + depends on !CC_IS_CLANG config G4_CPU bool "G4 (74xx)" -- cgit v1.2.3 From 45abf5d94b9bd0eebca5c7272788e2d16c8b5b43 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Feb 2023 21:17:16 +1000 Subject: powerpc/64s/radix: Remove need_flush_all test from radix__tlb_flush need_flush_all is only set by arch code to instruct generic tlb_flush to flush all. It is never set by powerpc, so it can be removed. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203111718.1149852-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 4e29b619578c..ac427fed681c 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -1302,7 +1302,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) * that flushes the process table entry cache upon process teardown. * See the comment for radix in arch_exit_mmap(). */ - if (tlb->fullmm || tlb->need_flush_all) { + if (tlb->fullmm) { __flush_all_mm(mm, true); } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { if (!tlb->freed_tables) -- cgit v1.2.3 From d01dc25e47af9d30185ca12bb9e221d6af915d9f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Feb 2023 21:17:17 +1000 Subject: powerpc/64s/radix: mm->context.id should always be valid The MMU_NO_CONTEXT checks are an unnecessary complication. Make these warn to prepare to remove them in future. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203111718.1149852-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 39 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index ac427fed681c..d6d072171797 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -700,12 +700,13 @@ static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, */ void radix__local_flush_tlb_mm(struct mm_struct *mm) { - unsigned long pid; + unsigned long pid = mm->context.id; + + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) + return; preempt_disable(); - pid = mm->context.id; - if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid, RIC_FLUSH_TLB); + _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); @@ -713,12 +714,13 @@ EXPORT_SYMBOL(radix__local_flush_tlb_mm); #ifndef CONFIG_SMP void radix__local_flush_all_mm(struct mm_struct *mm) { - unsigned long pid; + unsigned long pid = mm->context.id; + + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) + return; preempt_disable(); - pid = mm->context.id; - if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_all_mm); @@ -732,12 +734,13 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { - unsigned long pid; + unsigned long pid = mm->context.id; + + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) + return; preempt_disable(); - pid = mm->context.id; - if (pid != MMU_NO_CONTEXT) - _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); preempt_enable(); } @@ -945,7 +948,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm) enum tlb_flush_type type; pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; preempt_disable(); @@ -985,7 +988,7 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) enum tlb_flush_type type; pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; preempt_disable(); @@ -1024,7 +1027,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, enum tlb_flush_type type; pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; preempt_disable(); @@ -1130,7 +1133,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, enum tlb_flush_type type; pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; preempt_disable(); @@ -1330,7 +1333,7 @@ static void __radix__flush_tlb_range_psize(struct mm_struct *mm, enum tlb_flush_type type; pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; fullmm = (end == TLB_FLUSH_ALL); @@ -1406,7 +1409,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) enum tlb_flush_type type; pid = mm->context.id; - if (unlikely(pid == MMU_NO_CONTEXT)) + if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; /* 4k page size, just blow the world */ -- cgit v1.2.3 From dcfecb989afdf9101ee42e2adf04756a2ea4819d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Feb 2023 21:17:18 +1000 Subject: powerpc/64s/radix: Remove TLB_FLUSH_ALL test from range flushes This looks like it came across from x86, but x86 uses TLB_FLUSH_ALL as a parameter to internal functions. Powerpc never sets it anywhere. Remove the associated logic and leave a warning for now. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203111718.1149852-4-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index d6d072171797..50b70105364f 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -1107,6 +1107,9 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) } EXPORT_SYMBOL(radix__flush_tlb_kernel_range); +/* + * Doesn't appear to be used anywhere. Remove. + */ #define TLB_FLUSH_ALL -1UL /* @@ -1128,7 +1131,6 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool fullmm = (end == TLB_FLUSH_ALL); bool flush_pid, flush_pwc = false; enum tlb_flush_type type; @@ -1136,15 +1138,15 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; + WARN_ON_ONCE(end == TLB_FLUSH_ALL); + preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - type = flush_type_needed(mm, fullmm); + type = flush_type_needed(mm, false); if (type == FLUSH_TYPE_NONE) goto out; - if (fullmm) - flush_pid = true; - else if (type == FLUSH_TYPE_GLOBAL) + if (type == FLUSH_TYPE_GLOBAL) flush_pid = nr_pages > tlb_single_page_flush_ceiling; else flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; @@ -1328,7 +1330,6 @@ static void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool fullmm = (end == TLB_FLUSH_ALL); bool flush_pid; enum tlb_flush_type type; @@ -1336,17 +1337,15 @@ static void __radix__flush_tlb_range_psize(struct mm_struct *mm, if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) return; - fullmm = (end == TLB_FLUSH_ALL); + WARN_ON_ONCE(end == TLB_FLUSH_ALL); preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - type = flush_type_needed(mm, fullmm); + type = flush_type_needed(mm, false); if (type == FLUSH_TYPE_NONE) goto out; - if (fullmm) - flush_pid = true; - else if (type == FLUSH_TYPE_GLOBAL) + if (type == FLUSH_TYPE_GLOBAL) flush_pid = nr_pages > tlb_single_page_flush_ceiling; else flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; -- cgit v1.2.3 From b505063910c134778202dfad9332dfcecb76bab3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Feb 2023 15:19:19 +0100 Subject: powerpc/iommu: fix memory leak with using debugfs_lookup() When calling debugfs_lookup() the result must have dput() called on it, otherwise the memory will leak over time. To make things simpler, just call debugfs_lookup_and_remove() instead which handles all of the logic at once. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230202141919.2298821-1-gregkh@linuxfoundation.org --- arch/powerpc/kernel/iommu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index caebe1431596..ee95937bdaf1 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -67,11 +67,9 @@ static void iommu_debugfs_add(struct iommu_table *tbl) static void iommu_debugfs_del(struct iommu_table *tbl) { char name[10]; - struct dentry *liobn_entry; sprintf(name, "%08lx", tbl->it_index); - liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - debugfs_remove(liobn_entry); + debugfs_lookup_and_remove(name, iommu_debugfs_dir); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} -- cgit v1.2.3 From a974f0c131891027fe8490e654a220151b4caa82 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 3 Feb 2023 11:39:43 +1100 Subject: selftests/powerpc: Add generic read/write file util File read/write is reimplemented in about 5 different ways in the various PowerPC selftests. This indicates it should be a common util. Add a common read_file / write_file implementation and convert users to it where (easily) possible. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203003947.38033-2-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/dscr/dscr.h | 33 ++---- .../selftests/powerpc/dscr/dscr_sysfs_test.c | 21 +--- tools/testing/selftests/powerpc/include/utils.h | 2 + .../testing/selftests/powerpc/nx-gzip/gzfht_test.c | 48 ++++----- tools/testing/selftests/powerpc/pmu/lib.c | 31 ++---- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 28 ++--- tools/testing/selftests/powerpc/utils.c | 117 +++++++++++++-------- 7 files changed, 120 insertions(+), 160 deletions(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h index b703714e7d98..aaa2b0d89f7e 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr.h +++ b/tools/testing/selftests/powerpc/dscr/dscr.h @@ -64,48 +64,31 @@ inline void set_dscr_usr(unsigned long val) /* Default DSCR access */ unsigned long get_default_dscr(void) { - int fd = -1, ret; - char buf[16]; + int err; + char buf[16] = {0}; unsigned long val; - if (fd == -1) { - fd = open(DSCR_DEFAULT, O_RDONLY); - if (fd == -1) { - perror("open() failed"); - exit(1); - } - } - memset(buf, 0, sizeof(buf)); - lseek(fd, 0, SEEK_SET); - ret = read(fd, buf, sizeof(buf)); - if (ret == -1) { + err = read_file(DSCR_DEFAULT, buf, sizeof(buf) - 1, NULL); + if (err) { perror("read() failed"); exit(1); } sscanf(buf, "%lx", &val); - close(fd); return val; } void set_default_dscr(unsigned long val) { - int fd = -1, ret; + int err; char buf[16]; - if (fd == -1) { - fd = open(DSCR_DEFAULT, O_RDWR); - if (fd == -1) { - perror("open() failed"); - exit(1); - } - } sprintf(buf, "%lx\n", val); - ret = write(fd, buf, strlen(buf)); - if (ret == -1) { + + err = write_file(DSCR_DEFAULT, buf, strlen(buf)); + if (err) { perror("write() failed"); exit(1); } - close(fd); } double uniform_deviate(int seed) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index f20d1c166d1e..c350f193830a 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -12,24 +12,13 @@ static int check_cpu_dscr_default(char *file, unsigned long val) { - char buf[10]; - int fd, rc; + char buf[10] = {0}; + int rc; - fd = open(file, O_RDWR); - if (fd == -1) { - perror("open() failed"); - return 1; - } - - rc = read(fd, buf, sizeof(buf)); - if (rc == -1) { - perror("read() failed"); - close(fd); - return 1; - } - close(fd); + rc = read_file(file, buf, sizeof(buf) - 1, NULL); + if (rc) + return rc; - buf[rc] = '\0'; if (strtol(buf, NULL, 16) != val) { printf("DSCR match failed: %ld (system) %ld (cpu)\n", val, strtol(buf, NULL, 16)); diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index e222a5858450..70885e5814a8 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -33,6 +33,8 @@ void *get_auxv_entry(int type); int pick_online_cpu(void); +int read_file(const char *path, char *buf, size_t count, size_t *len); +int write_file(const char *path, const char *buf, size_t count); int read_debugfs_file(char *debugfs_file, int *result); int write_debugfs_file(char *debugfs_file, int result); int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index 095195a25687..fbc3d265155b 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -146,49 +146,37 @@ int gzip_header_blank(char *buf) /* Caller must free the allocated buffer return nonzero on error. */ int read_alloc_input_file(char *fname, char **buf, size_t *bufsize) { + int err; struct stat statbuf; - FILE *fp; char *p; size_t num_bytes; if (stat(fname, &statbuf)) { perror(fname); - return(-1); - } - fp = fopen(fname, "r"); - if (fp == NULL) { - perror(fname); - return(-1); + return -1; } + assert(NULL != (p = (char *) malloc(statbuf.st_size))); - num_bytes = fread(p, 1, statbuf.st_size, fp); - if (ferror(fp) || (num_bytes != statbuf.st_size)) { + + err = read_file(fname, p, statbuf.st_size, &num_bytes); + if (err) { perror(fname); - return(-1); + goto fail; } + + if (num_bytes != statbuf.st_size) { + fprintf(stderr, "Actual bytes != expected bytes\n"); + err = -1; + goto fail; + } + *buf = p; *bufsize = num_bytes; return 0; -} -/* Returns nonzero on error */ -int write_output_file(char *fname, char *buf, size_t bufsize) -{ - FILE *fp; - size_t num_bytes; - - fp = fopen(fname, "w"); - if (fp == NULL) { - perror(fname); - return(-1); - } - num_bytes = fwrite(buf, 1, bufsize, fp); - if (ferror(fp) || (num_bytes != bufsize)) { - perror(fname); - return(-1); - } - fclose(fp); - return 0; +fail: + free(p); + return err; } /* @@ -399,7 +387,7 @@ int compress_file(int argc, char **argv, void *handle) assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT))); strcpy(outname, argv[1]); strcat(outname, FEXT); - if (write_output_file(outname, outbuf, dsttotlen)) { + if (write_file(outname, outbuf, dsttotlen)) { fprintf(stderr, "write error: %s\n", outname); exit(-1); } diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c index 88690b97b7b9..960915304a65 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.c +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -190,38 +190,23 @@ int parse_proc_maps(void) bool require_paranoia_below(int level) { + int err; long current; - char *end, buf[16]; - FILE *f; - bool rc; - - rc = false; - - f = fopen(PARANOID_PATH, "r"); - if (!f) { - perror("fopen"); - goto out; - } + char *end; + char buf[16] = {0}; - if (!fgets(buf, sizeof(buf), f)) { + err = read_file(PARANOID_PATH, buf, sizeof(buf) - 1, NULL); + if (err) { printf("Couldn't read " PARANOID_PATH "?\n"); - goto out_close; + return false; } current = strtol(buf, &end, 10); if (end == buf) { printf("Couldn't parse " PARANOID_PATH "?\n"); - goto out_close; + return false; } - if (current >= level) - goto out_close; - - rc = true; -out_close: - fclose(f); -out: - return rc; + return current < level; } - diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index 4e8d0ce1ff58..f6f8596ce8e1 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -348,15 +348,11 @@ static int parent(struct shared_info *info, pid_t pid) static int write_core_pattern(const char *core_pattern) { - size_t len = strlen(core_pattern), ret; - FILE *f; + int err; - f = fopen(core_pattern_file, "w"); - SKIP_IF_MSG(!f, "Try with root privileges"); - - ret = fwrite(core_pattern, 1, len, f); - fclose(f); - if (ret != len) { + err = write_file(core_pattern_file, core_pattern, strlen(core_pattern)); + if (err) { + SKIP_IF_MSG(err == -EPERM, "Try with root privileges"); perror("Error writing to core_pattern file"); return TEST_FAIL; } @@ -366,8 +362,8 @@ static int write_core_pattern(const char *core_pattern) static int setup_core_pattern(char **core_pattern_, bool *changed_) { - FILE *f; char *core_pattern; + size_t len; int ret; core_pattern = malloc(PATH_MAX); @@ -376,22 +372,14 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_) return TEST_FAIL; } - f = fopen(core_pattern_file, "r"); - if (!f) { - perror("Error opening core_pattern file"); - ret = TEST_FAIL; - goto out; - } - - ret = fread(core_pattern, 1, PATH_MAX - 1, f); - fclose(f); - if (!ret) { + ret = read_file(core_pattern_file, core_pattern, PATH_MAX - 1, &len); + if (ret) { perror("Error reading core_pattern file"); ret = TEST_FAIL; goto out; } - core_pattern[ret] = '\0'; + core_pattern[len] = '\0'; /* Check whether we can predict the name of the core file. */ if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p")) diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 1f36ee1a909a..22ba13425b2c 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -26,34 +26,83 @@ static char auxv[4096]; -int read_auxv(char *buf, ssize_t buf_size) +int read_file(const char *path, char *buf, size_t count, size_t *len) { - ssize_t num; - int rc, fd; + ssize_t rc; + int fd; + int err; + char eof; - fd = open("/proc/self/auxv", O_RDONLY); - if (fd == -1) { - perror("open"); + fd = open(path, O_RDONLY); + if (fd < 0) return -errno; + + rc = read(fd, buf, count); + if (rc < 0) { + err = -errno; + goto out; + } + + if (len) + *len = rc; + + /* Overflow if there are still more bytes after filling the buffer */ + if (rc == count) { + rc = read(fd, &eof, 1); + if (rc != 0) { + err = -EOVERFLOW; + goto out; + } } - num = read(fd, buf, buf_size); - if (num < 0) { - perror("read"); - rc = -EIO; + err = 0; + +out: + close(fd); + errno = -err; + return err; +} + +int write_file(const char *path, const char *buf, size_t count) +{ + int fd; + int err; + ssize_t rc; + + fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + return -errno; + + rc = write(fd, buf, count); + if (rc < 0) { + err = -errno; goto out; } - if (num > buf_size) { - printf("overflowed auxv buffer\n"); - rc = -EOVERFLOW; + if (rc != count) { + err = -EOVERFLOW; goto out; } - rc = 0; + err = 0; + out: close(fd); - return rc; + errno = -err; + return err; +} + +int read_auxv(char *buf, ssize_t buf_size) +{ + int err; + + err = read_file("/proc/self/auxv", buf, buf_size, NULL); + if (err) { + perror("Error reading /proc/self/auxv"); + return err; + } + + return 0; } void *find_auxv_entry(int type, char *auxv) @@ -142,65 +191,41 @@ bool is_ppc64le(void) int read_sysfs_file(char *fpath, char *result, size_t result_size) { char path[PATH_MAX] = "/sys/"; - int rc = -1, fd; strncat(path, fpath, PATH_MAX - strlen(path) - 1); - if ((fd = open(path, O_RDONLY)) < 0) - return rc; - - rc = read(fd, result, result_size); - - close(fd); - - if (rc < 0) - return rc; - - return 0; + return read_file(path, result, result_size, NULL); } int read_debugfs_file(char *debugfs_file, int *result) { - int rc = -1, fd; + int err; char path[PATH_MAX]; - char value[16]; + char value[16] = {0}; strcpy(path, "/sys/kernel/debug/"); strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1); - if ((fd = open(path, O_RDONLY)) < 0) - return rc; - - if ((rc = read(fd, value, sizeof(value))) < 0) - return rc; + err = read_file(path, value, sizeof(value) - 1, NULL); + if (err) + return err; - value[15] = 0; *result = atoi(value); - close(fd); return 0; } int write_debugfs_file(char *debugfs_file, int result) { - int rc = -1, fd; char path[PATH_MAX]; char value[16]; strcpy(path, "/sys/kernel/debug/"); strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1); - if ((fd = open(path, O_WRONLY)) < 0) - return rc; - snprintf(value, 16, "%d", result); - if ((rc = write(fd, value, strlen(value))) < 0) - return rc; - - close(fd); - - return 0; + return write_file(path, value, strlen(value)); } static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, -- cgit v1.2.3 From 121d340be9a17ed89d523c56203908c01e09a306 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 3 Feb 2023 11:39:44 +1100 Subject: selftests/powerpc: Add read/write debugfs file, int Debugfs files are not always integers, so make *_file return/write a byte buffer, and *_int deal with int values specifically. This increases consistency with the other file read/write helpers. Signed-off-by: Benjamin Gray Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203003947.38033-3-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/include/utils.h | 6 ++-- .../selftests/powerpc/security/entry_flush.c | 12 ++++---- .../testing/selftests/powerpc/security/rfi_flush.c | 12 ++++---- .../selftests/powerpc/security/uaccess_flush.c | 18 ++++++------ tools/testing/selftests/powerpc/utils.c | 34 ++++++++++++++-------- 5 files changed, 47 insertions(+), 35 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 70885e5814a8..de5e3790f397 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -35,8 +35,10 @@ int pick_online_cpu(void); int read_file(const char *path, char *buf, size_t count, size_t *len); int write_file(const char *path, const char *buf, size_t count); -int read_debugfs_file(char *debugfs_file, int *result); -int write_debugfs_file(char *debugfs_file, int result); +int read_debugfs_file(const char *debugfs_file, char *buf, size_t count); +int write_debugfs_file(const char *debugfs_file, const char *buf, size_t count); +int read_debugfs_int(const char *debugfs_file, int *result); +int write_debugfs_int(const char *debugfs_file, int result); int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); int perf_event_open_counter(unsigned int type, unsigned long config, int group_fd); diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 68ce377b205e..e01c573deadd 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -34,18 +34,18 @@ int entry_flush_test(void) // The PMU event we use only works on Power7 or later SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + if (read_debugfs_int("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + if (read_debugfs_int("powerpc/entry_flush", &entry_flush_orig) < 0) { perror("Unable to read powerpc/entry_flush debugfs file"); SKIP_IF(1); } if (rfi_flush_orig != 0) { - if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + if (write_debugfs_int("powerpc/rfi_flush", 0) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); FAIL_IF(1); } @@ -105,7 +105,7 @@ again: if (entry_flush == entry_flush_orig) { entry_flush = !entry_flush_orig; - if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) { + if (write_debugfs_int("powerpc/entry_flush", entry_flush) < 0) { perror("error writing to powerpc/entry_flush debugfs file"); return 1; } @@ -120,12 +120,12 @@ again: set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + if (write_debugfs_int("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } - if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + if (write_debugfs_int("powerpc/entry_flush", entry_flush_orig) < 0) { perror("unable to restore original value of powerpc/entry_flush debugfs file"); return 1; } diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index f73484a6470f..6bedc86443a6 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -34,18 +34,18 @@ int rfi_flush_test(void) // The PMU event we use only works on Power7 or later SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + if (read_debugfs_int("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + if (read_debugfs_int("powerpc/entry_flush", &entry_flush_orig) < 0) { have_entry_flush = 0; } else { have_entry_flush = 1; if (entry_flush_orig != 0) { - if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + if (write_debugfs_int("powerpc/entry_flush", 0) < 0) { perror("error writing to powerpc/entry_flush debugfs file"); return 1; } @@ -105,7 +105,7 @@ again: if (rfi_flush == rfi_flush_orig) { rfi_flush = !rfi_flush_orig; - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) { + if (write_debugfs_int("powerpc/rfi_flush", rfi_flush) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); return 1; } @@ -120,13 +120,13 @@ again: set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + if (write_debugfs_int("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } if (have_entry_flush) { - if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + if (write_debugfs_int("powerpc/entry_flush", entry_flush_orig) < 0) { perror("unable to restore original value of powerpc/entry_flush " "debugfs file"); return 1; diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c index cf80f960e38a..fcf23ea9b183 100644 --- a/tools/testing/selftests/powerpc/security/uaccess_flush.c +++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c @@ -36,30 +36,30 @@ int uaccess_flush_test(void) // The PMU event we use only works on Power7 or later SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + if (read_debugfs_int("powerpc/rfi_flush", &rfi_flush_orig) < 0) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); } - if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + if (read_debugfs_int("powerpc/entry_flush", &entry_flush_orig) < 0) { perror("Unable to read powerpc/entry_flush debugfs file"); SKIP_IF(1); } - if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) { + if (read_debugfs_int("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) { perror("Unable to read powerpc/entry_flush debugfs file"); SKIP_IF(1); } if (rfi_flush_orig != 0) { - if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + if (write_debugfs_int("powerpc/rfi_flush", 0) < 0) { perror("error writing to powerpc/rfi_flush debugfs file"); FAIL_IF(1); } } if (entry_flush_orig != 0) { - if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + if (write_debugfs_int("powerpc/entry_flush", 0) < 0) { perror("error writing to powerpc/entry_flush debugfs file"); FAIL_IF(1); } @@ -119,7 +119,7 @@ again: if (uaccess_flush == uaccess_flush_orig) { uaccess_flush = !uaccess_flush_orig; - if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) { + if (write_debugfs_int("powerpc/uaccess_flush", uaccess_flush) < 0) { perror("error writing to powerpc/uaccess_flush debugfs file"); return 1; } @@ -134,17 +134,17 @@ again: set_dscr(0); - if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + if (write_debugfs_int("powerpc/rfi_flush", rfi_flush_orig) < 0) { perror("unable to restore original value of powerpc/rfi_flush debugfs file"); return 1; } - if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + if (write_debugfs_int("powerpc/entry_flush", entry_flush_orig) < 0) { perror("unable to restore original value of powerpc/entry_flush debugfs file"); return 1; } - if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) { + if (write_debugfs_int("powerpc/uaccess_flush", uaccess_flush_orig) < 0) { perror("unable to restore original value of powerpc/uaccess_flush debugfs file"); return 1; } diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 22ba13425b2c..495299a79f50 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -105,6 +105,24 @@ int read_auxv(char *buf, ssize_t buf_size) return 0; } +int read_debugfs_file(const char *subpath, char *buf, size_t count) +{ + char path[PATH_MAX] = "/sys/kernel/debug/"; + + strncat(path, subpath, sizeof(path) - strlen(path) - 1); + + return read_file(path, buf, count, NULL); +} + +int write_debugfs_file(const char *subpath, const char *buf, size_t count) +{ + char path[PATH_MAX] = "/sys/kernel/debug/"; + + strncat(path, subpath, sizeof(path) - strlen(path) - 1); + + return write_file(path, buf, count); +} + void *find_auxv_entry(int type, char *auxv) { ElfW(auxv_t) *p; @@ -197,16 +215,12 @@ int read_sysfs_file(char *fpath, char *result, size_t result_size) return read_file(path, result, result_size, NULL); } -int read_debugfs_file(char *debugfs_file, int *result) +int read_debugfs_int(const char *debugfs_file, int *result) { int err; - char path[PATH_MAX]; char value[16] = {0}; - strcpy(path, "/sys/kernel/debug/"); - strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1); - - err = read_file(path, value, sizeof(value) - 1, NULL); + err = read_debugfs_file(debugfs_file, value, sizeof(value) - 1); if (err) return err; @@ -215,17 +229,13 @@ int read_debugfs_file(char *debugfs_file, int *result) return 0; } -int write_debugfs_file(char *debugfs_file, int result) +int write_debugfs_int(const char *debugfs_file, int result) { - char path[PATH_MAX]; char value[16]; - strcpy(path, "/sys/kernel/debug/"); - strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1); - snprintf(value, 16, "%d", result); - return write_file(path, value, strlen(value)); + return write_debugfs_file(debugfs_file, value, strlen(value)); } static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, -- cgit v1.2.3 From d1bc05b7bf02f8635fe6c445f67d78f85234cbb7 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 3 Feb 2023 11:39:45 +1100 Subject: selftests/powerpc: Parse long/unsigned long value safely Often a file is expected to hold an integral value. Existing functions will use a C stdlib function like atoi or strtol to parse the file. These operations are error prone, with complicated error conditions (atoi returns 0 if not a number, and is undefined behaviour if not in range. strtol returns 0 if not a number, and LONG_MIN/MAX if not in range + sets errno to ERANGE). Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203003947.38033-4-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/include/utils.h | 7 ++ tools/testing/selftests/powerpc/pmu/lib.c | 6 +- tools/testing/selftests/powerpc/utils.c | 126 +++++++++++++++++++++++- 3 files changed, 132 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index de5e3790f397..7c1fa385824c 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -33,6 +33,13 @@ void *get_auxv_entry(int type); int pick_online_cpu(void); +int parse_intmax(const char *buffer, size_t count, intmax_t *result, int base); +int parse_uintmax(const char *buffer, size_t count, uintmax_t *result, int base); +int parse_int(const char *buffer, size_t count, int *result, int base); +int parse_uint(const char *buffer, size_t count, unsigned int *result, int base); +int parse_long(const char *buffer, size_t count, long *result, int base); +int parse_ulong(const char *buffer, size_t count, unsigned long *result, int base); + int read_file(const char *path, char *buf, size_t count, size_t *len); int write_file(const char *path, const char *buf, size_t count); int read_debugfs_file(const char *debugfs_file, char *buf, size_t count); diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c index 960915304a65..1cfc13a25aee 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.c +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -192,7 +192,6 @@ bool require_paranoia_below(int level) { int err; long current; - char *end; char buf[16] = {0}; err = read_file(PARANOID_PATH, buf, sizeof(buf) - 1, NULL); @@ -201,9 +200,8 @@ bool require_paranoia_below(int level) return false; } - current = strtol(buf, &end, 10); - - if (end == buf) { + err = parse_long(buf, sizeof(buf), ¤t, 10); + if (err) { printf("Couldn't parse " PARANOID_PATH "?\n"); return false; } diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 495299a79f50..ddfd871881bf 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -123,6 +125,126 @@ int write_debugfs_file(const char *subpath, const char *buf, size_t count) return write_file(path, buf, count); } +static int validate_int_parse(const char *buffer, size_t count, char *end) +{ + int err = 0; + + /* Require at least one digit */ + if (end == buffer) { + err = -EINVAL; + goto out; + } + + /* Require all remaining characters be whitespace-ish */ + for (; end < buffer + count; end++) { + if (*end == '\0') + break; + + if (*end != ' ' && *end != '\n') { + err = -EINVAL; + goto out; + } + } + +out: + errno = -err; + return err; +} + +static int parse_bounded_int(const char *buffer, size_t count, intmax_t *result, + int base, intmax_t min, intmax_t max) +{ + int err; + char *end; + + errno = 0; + *result = strtoimax(buffer, &end, base); + + if (errno) + return -errno; + + err = validate_int_parse(buffer, count, end); + if (err) + goto out; + + if (*result < min || *result > max) + err = -EOVERFLOW; + +out: + errno = -err; + return err; +} + +static int parse_bounded_uint(const char *buffer, size_t count, uintmax_t *result, + int base, uintmax_t max) +{ + int err = 0; + char *end; + + errno = 0; + *result = strtoumax(buffer, &end, base); + + if (errno) + return -errno; + + err = validate_int_parse(buffer, count, end); + if (err) + goto out; + + if (*result > max) + err = -EOVERFLOW; + +out: + errno = -err; + return err; +} + +int parse_intmax(const char *buffer, size_t count, intmax_t *result, int base) +{ + return parse_bounded_int(buffer, count, result, base, INTMAX_MIN, INTMAX_MAX); +} + +int parse_uintmax(const char *buffer, size_t count, uintmax_t *result, int base) +{ + return parse_bounded_uint(buffer, count, result, base, UINTMAX_MAX); +} + +int parse_int(const char *buffer, size_t count, int *result, int base) +{ + intmax_t parsed; + int err = parse_bounded_int(buffer, count, &parsed, base, INT_MIN, INT_MAX); + + *result = parsed; + return err; +} + +int parse_uint(const char *buffer, size_t count, unsigned int *result, int base) +{ + uintmax_t parsed; + int err = parse_bounded_uint(buffer, count, &parsed, base, UINT_MAX); + + *result = parsed; + return err; +} + +int parse_long(const char *buffer, size_t count, long *result, int base) +{ + intmax_t parsed; + int err = parse_bounded_int(buffer, count, &parsed, base, LONG_MIN, LONG_MAX); + + *result = parsed; + return err; +} + +int parse_ulong(const char *buffer, size_t count, unsigned long *result, int base) +{ + uintmax_t parsed; + int err = parse_bounded_uint(buffer, count, &parsed, base, ULONG_MAX); + + *result = parsed; + return err; +} + void *find_auxv_entry(int type, char *auxv) { ElfW(auxv_t) *p; @@ -224,9 +346,7 @@ int read_debugfs_int(const char *debugfs_file, int *result) if (err) return err; - *result = atoi(value); - - return 0; + return parse_int(value, sizeof(value), result, 10); } int write_debugfs_int(const char *debugfs_file, int result) -- cgit v1.2.3 From 5c20de57888f0962e25a0eeec1a59c98056fc42e Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 3 Feb 2023 11:39:46 +1100 Subject: selftests/powerpc: Add {read,write}_{long,ulong} Add helper functions to read and write (unsigned) long values directly from/to files. One of the kernel interfaces uses hex strings, so we need to allow passing a base too. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203003947.38033-5-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/dscr/dscr.h | 9 +-- .../selftests/powerpc/dscr/dscr_sysfs_test.c | 14 ++-- tools/testing/selftests/powerpc/include/utils.h | 4 ++ tools/testing/selftests/powerpc/pmu/lib.c | 9 +-- tools/testing/selftests/powerpc/utils.c | 81 ++++++++++++++++++++++ 5 files changed, 95 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h index aaa2b0d89f7e..2c54998d4715 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr.h +++ b/tools/testing/selftests/powerpc/dscr/dscr.h @@ -65,26 +65,21 @@ inline void set_dscr_usr(unsigned long val) unsigned long get_default_dscr(void) { int err; - char buf[16] = {0}; unsigned long val; - err = read_file(DSCR_DEFAULT, buf, sizeof(buf) - 1, NULL); + err = read_ulong(DSCR_DEFAULT, &val, 16); if (err) { perror("read() failed"); exit(1); } - sscanf(buf, "%lx", &val); return val; } void set_default_dscr(unsigned long val) { int err; - char buf[16]; - sprintf(buf, "%lx\n", val); - - err = write_file(DSCR_DEFAULT, buf, strlen(buf)); + err = write_ulong(DSCR_DEFAULT, val, 16); if (err) { perror("write() failed"); exit(1); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index c350f193830a..4f1fef6198fc 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -12,16 +12,16 @@ static int check_cpu_dscr_default(char *file, unsigned long val) { - char buf[10] = {0}; - int rc; + unsigned long cpu_dscr; + int err; - rc = read_file(file, buf, sizeof(buf) - 1, NULL); - if (rc) - return rc; + err = read_ulong(file, &cpu_dscr, 16); + if (err) + return err; - if (strtol(buf, NULL, 16) != val) { + if (cpu_dscr != val) { printf("DSCR match failed: %ld (system) %ld (cpu)\n", - val, strtol(buf, NULL, 16)); + val, cpu_dscr); return 1; } return 0; diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 7c1fa385824c..311ddc124ebc 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,10 @@ int parse_ulong(const char *buffer, size_t count, unsigned long *result, int bas int read_file(const char *path, char *buf, size_t count, size_t *len); int write_file(const char *path, const char *buf, size_t count); +int read_long(const char *path, long *result, int base); +int write_long(const char *path, long result, int base); +int read_ulong(const char *path, unsigned long *result, int base); +int write_ulong(const char *path, unsigned long result, int base); int read_debugfs_file(const char *debugfs_file, char *buf, size_t count); int write_debugfs_file(const char *debugfs_file, const char *buf, size_t count); int read_debugfs_int(const char *debugfs_file, int *result); diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c index 1cfc13a25aee..719f94f10d41 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.c +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -192,15 +192,8 @@ bool require_paranoia_below(int level) { int err; long current; - char buf[16] = {0}; - err = read_file(PARANOID_PATH, buf, sizeof(buf) - 1, NULL); - if (err) { - printf("Couldn't read " PARANOID_PATH "?\n"); - return false; - } - - err = parse_long(buf, sizeof(buf), ¤t, 10); + err = read_long(PARANOID_PATH, ¤t, 10); if (err) { printf("Couldn't parse " PARANOID_PATH "?\n"); return false; diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index ddfd871881bf..1017f1738619 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -245,6 +245,87 @@ int parse_ulong(const char *buffer, size_t count, unsigned long *result, int bas return err; } +int read_long(const char *path, long *result, int base) +{ + int err; + char buffer[32] = {0}; + + err = read_file(path, buffer, sizeof(buffer) - 1, NULL); + if (err) + return err; + + return parse_long(buffer, sizeof(buffer), result, base); +} + +int read_ulong(const char *path, unsigned long *result, int base) +{ + int err; + char buffer[32] = {0}; + + err = read_file(path, buffer, sizeof(buffer) - 1, NULL); + if (err) + return err; + + return parse_ulong(buffer, sizeof(buffer), result, base); +} + +int write_long(const char *path, long result, int base) +{ + int err; + int len; + char buffer[32]; + + /* Decimal only for now: no format specifier for signed hex values */ + if (base != 10) { + err = -EINVAL; + goto out; + } + + len = snprintf(buffer, sizeof(buffer), "%ld", result); + if (len < 0 || len >= sizeof(buffer)) { + err = -EOVERFLOW; + goto out; + } + + err = write_file(path, buffer, len); + +out: + errno = -err; + return err; +} + +int write_ulong(const char *path, unsigned long result, int base) +{ + int err; + int len; + char buffer[32]; + char *fmt; + + switch (base) { + case 10: + fmt = "%lu"; + break; + case 16: + fmt = "%lx"; + break; + default: + err = -EINVAL; + goto out; + } + + len = snprintf(buffer, sizeof(buffer), fmt, result); + if (len < 0 || len >= sizeof(buffer)) { + err = -errno; + goto out; + } + + err = write_file(path, buffer, len); + +out: + errno = -err; + return err; +} + void *find_auxv_entry(int type, char *auxv) { ElfW(auxv_t) *p; -- cgit v1.2.3 From 8d7253dc447473dfcf3f09fb0fa2bd6f7d05b43b Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 3 Feb 2023 11:39:47 +1100 Subject: selftests/powerpc: Add automatically allocating read_file A couple of tests roll their own auto-allocating file read logic. Add a generic implementation and convert them to use it. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203003947.38033-6-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/include/utils.h | 1 + .../testing/selftests/powerpc/nx-gzip/gzfht_test.c | 38 +--------- tools/testing/selftests/powerpc/syscalls/Makefile | 2 +- .../selftests/powerpc/syscalls/rtas_filter.c | 81 +++------------------- tools/testing/selftests/powerpc/utils.c | 58 ++++++++++++++++ 5 files changed, 71 insertions(+), 109 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 311ddc124ebc..eed7dd7582b2 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,7 @@ int parse_ulong(const char *buffer, size_t count, unsigned long *result, int bas int read_file(const char *path, char *buf, size_t count, size_t *len); int write_file(const char *path, const char *buf, size_t count); +int read_file_alloc(const char *path, char **buf, size_t *len); int read_long(const char *path, long *result, int base); int write_long(const char *path, long result, int base); int read_ulong(const char *path, unsigned long *result, int base); diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index fbc3d265155b..4de079923ccb 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -143,42 +143,6 @@ int gzip_header_blank(char *buf) return i; } -/* Caller must free the allocated buffer return nonzero on error. */ -int read_alloc_input_file(char *fname, char **buf, size_t *bufsize) -{ - int err; - struct stat statbuf; - char *p; - size_t num_bytes; - - if (stat(fname, &statbuf)) { - perror(fname); - return -1; - } - - assert(NULL != (p = (char *) malloc(statbuf.st_size))); - - err = read_file(fname, p, statbuf.st_size, &num_bytes); - if (err) { - perror(fname); - goto fail; - } - - if (num_bytes != statbuf.st_size) { - fprintf(stderr, "Actual bytes != expected bytes\n"); - err = -1; - goto fail; - } - - *buf = p; - *bufsize = num_bytes; - return 0; - -fail: - free(p); - return err; -} - /* * Z_SYNC_FLUSH as described in zlib.h. * Returns number of appended bytes @@ -245,7 +209,7 @@ int compress_file(int argc, char **argv, void *handle) fprintf(stderr, "usage: %s \n", argv[0]); exit(-1); } - if (read_alloc_input_file(argv[1], &inbuf, &inlen)) + if (read_file_alloc(argv[1], &inbuf, &inlen)) exit(-1); fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen); diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index b63f8459c704..54ff5cfffc63 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -6,4 +6,4 @@ CFLAGS += -I../../../../../usr/include top_srcdir = ../../../../.. include ../../lib.mk -$(TEST_GEN_PROGS): ../harness.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c index 03b487f18d00..9b17780f0b18 100644 --- a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c +++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -50,70 +51,16 @@ struct region { struct region *next; }; -int read_entire_file(int fd, char **buf, size_t *len) -{ - size_t buf_size = 0; - size_t off = 0; - int rc; - - *buf = NULL; - do { - buf_size += BLOCK_SIZE; - if (*buf == NULL) - *buf = malloc(buf_size); - else - *buf = realloc(*buf, buf_size); - - if (*buf == NULL) - return -ENOMEM; - - rc = read(fd, *buf + off, BLOCK_SIZE); - if (rc < 0) - return -EIO; - - off += rc; - } while (rc == BLOCK_SIZE); - - if (len) - *len = off; - - return 0; -} - -static int open_prop_file(const char *prop_path, const char *prop_name, int *fd) -{ - char *path; - int len; - - /* allocate enough for two string, a slash and trailing NULL */ - len = strlen(prop_path) + strlen(prop_name) + 1 + 1; - path = malloc(len); - if (path == NULL) - return -ENOMEM; - - snprintf(path, len, "%s/%s", prop_path, prop_name); - - *fd = open(path, O_RDONLY); - free(path); - if (*fd < 0) - return -errno; - - return 0; -} - static int get_property(const char *prop_path, const char *prop_name, char **prop_val, size_t *prop_len) { - int rc, fd; - - rc = open_prop_file(prop_path, prop_name, &fd); - if (rc) - return rc; + char path[PATH_MAX]; - rc = read_entire_file(fd, prop_val, prop_len); - close(fd); + int len = snprintf(path, sizeof(path), "%s/%s", prop_path, prop_name); + if (len < 0 || len >= sizeof(path)) + return -ENOMEM; - return rc; + return read_file_alloc(path, prop_val, prop_len); } int rtas_token(const char *call_name) @@ -138,22 +85,14 @@ err: static int read_kregion_bounds(struct region *kregion) { char *buf; - int fd; - int rc; + int err; - fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY); - if (fd < 0) { - printf("Could not open rmo_buffer file\n"); + err = read_file_alloc("/proc/ppc64/rtas/rmo_buffer", &buf, NULL); + if (err) { + perror("Could not open rmo_buffer file"); return RTAS_IO_ASSERT; } - rc = read_entire_file(fd, &buf, NULL); - close(fd); - if (rc) { - free(buf); - return rc; - } - sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size); free(buf); diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 1017f1738619..7c8cfedb012a 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -65,6 +65,64 @@ out: return err; } +int read_file_alloc(const char *path, char **buf, size_t *len) +{ + size_t read_offset = 0; + size_t buffer_len = 0; + char *buffer = NULL; + int err; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + + /* + * We don't use stat & preallocate st_size because some non-files + * report 0 file size. Instead just dynamically grow the buffer + * as needed. + */ + while (1) { + ssize_t rc; + + if (read_offset >= buffer_len / 2) { + char *next_buffer; + + buffer_len = buffer_len ? buffer_len * 2 : 4096; + next_buffer = realloc(buffer, buffer_len); + if (!next_buffer) { + err = -errno; + goto out; + } + buffer = next_buffer; + } + + rc = read(fd, buffer + read_offset, buffer_len - read_offset); + if (rc < 0) { + err = -errno; + goto out; + } + + if (rc == 0) + break; + + read_offset += rc; + } + + *buf = buffer; + if (len) + *len = read_offset; + + err = 0; + +out: + close(fd); + if (err) + free(buffer); + errno = -err; + return err; +} + int write_file(const char *path, const char *buf, size_t count) { int fd; -- cgit v1.2.3 From 2115732e548304e52ca1bbdb714f45f4a2461653 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 1 Feb 2023 15:29:33 +0530 Subject: powerpc/mce: log the error for all unrecoverable errors For all unrecoverable errors we are missing to log the error, Since machine_check_log_err() is not getting called for unrecoverable errors. machine_check_log_err() is called from deferred handler, To run deferred handlers we have to do irq work raise from the exception handler. For recoverable errors exception vector code takes care of running deferred handlers. For unrecoverable errors raise irq work in save_mce_event(), So that we log the error from MCE deferred handler. Log without this change MCE: CPU27: machine check (Severe) Real address Load/Store (foreign/control memory) [Not recovered] MCE: CPU27: PID: 10580 Comm: inject-ra-err NIP: [0000000010000df4] MCE: CPU27: Initiator CPU MCE: CPU27: Unknown Log with this change MCE: CPU24: machine check (Severe) Real address Load/Store (foreign/control memory) [Not recovered] MCE: CPU24: PID: 1589811 Comm: inject-ra-err NIP: [0000000010000e48] MCE: CPU24: Initiator CPU MCE: CPU24: Unknown RTAS: event: 5, Type: Platform Error (224), Severity: 3 Signed-off-by: Ganesh Goudar Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230201095933.129482-1-ganeshgr@linux.ibm.com --- arch/powerpc/kernel/mce.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 6c5d30fba766..219f28637a3e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -131,6 +131,13 @@ void save_mce_event(struct pt_regs *regs, long handled, if (mce->error_type == MCE_ERROR_TYPE_UE) mce->u.ue_error.ignore_event = mce_err->ignore_event; + /* + * Raise irq work, So that we don't miss to log the error for + * unrecoverable errors. + */ + if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED) + mce_irq_work_queue(); + if (!addr) return; @@ -233,9 +240,6 @@ static void machine_check_ue_event(struct machine_check_event *evt) } memcpy(&local_paca->mce_info->mce_ue_event_queue[index], evt, sizeof(*evt)); - - /* Queue work to process this event later. */ - mce_irq_work_queue(); } /* -- cgit v1.2.3 From d9ab6da64fd15608c9feb20d769d8df1a32fe212 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:23 +0100 Subject: powerpc: Remove __kernel_text_address() in show_instructions() That test was introducted in 2006 by commit 00ae36de49cc ("[POWERPC] Better check in show_instructions"). At that time, there was no BPF progs. As seen in message of commit 89d21e259a94 ("powerpc/bpf/32: Fix Oops on tail call tests"), when a page fault occurs in test_bpf.ko for instance, the code is dumped as XXXXXXXXs. Allthough __kernel_text_address() checks is_bpf_text_address(), it seems it is not enough. Today, show_instructions() uses get_kernel_nofault() to read the code, so there is no real need for additional verifications. ARM64 and x86 don't do any additional check before dumping instructions. Do the same and remove __kernel_text_address() in show_instructions(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4fd69ef7945518c3e27f96b95046a5c1468d35bf.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c22cc234672f..effe9697905d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1405,8 +1405,7 @@ static void show_instructions(struct pt_regs *regs) for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; - if (!__kernel_text_address(pc) || - get_kernel_nofault(instr, (const void *)pc)) { + if (get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (nip == pc) -- cgit v1.2.3 From 6376ed8feca829039d31a208216b958f0e439d87 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:24 +0100 Subject: powerpc/bpf/32: No need to zeroise r4 when not doing tail call r4 is cleared at function entry and used as tail call count. But when the function does not perform tail call, r4 is ignored, so no need to clear it. Replace it by a NOP in that case. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9c5440b2b6d90a78600257433ac499b5c5101fbb.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index a379b0ce19ff..4e6caee9c98a 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -114,7 +114,10 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ - EMIT(PPC_RAW_LI(_R4, 0)); + if (ctx->seen & SEEN_TAILCALL) + EMIT(PPC_RAW_LI(_R4, 0)); + else + EMIT(PPC_RAW_NOP()); #define BPF_TAILCALL_PROLOGUE_SIZE 4 -- cgit v1.2.3 From d084dcf256bc4565b4b1af9b00297ac7b51c7049 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:25 +0100 Subject: powerpc/bpf/32: Only set a stack frame when necessary Until now a stack frame was set at all time due to the need to keep tail call counter in the stack. But since commit 89d21e259a94 ("powerpc/bpf/32: Fix Oops on tail call tests") the tail call counter is passed via register r4. It is therefore not necessary anymore to have a stack frame for that. Just like PPC64, implement bpf_has_stack_frame() and only sets the frame when needed. The difference with PPC64 is that PPC32 doesn't have a redzone, so the stack is required as soon as non volatile registers are used or when tail call count is set up. Signed-off-by: Christophe Leroy [mpe: Fix commit reference in change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/62d7b654a3cfe73d998697cb29bbc5ffd89bfdb1.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 4e6caee9c98a..7f54d37bede6 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -79,6 +79,20 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) #define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */ #define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */ +static inline bool bpf_has_stack_frame(struct codegen_context *ctx) +{ + /* + * We only need a stack frame if: + * - we call other functions (kernel helpers), or + * - we use non volatile registers, or + * - we use tail call counter + * - the bpf program uses its stack area + * The latter condition is deduced from the usage of BPF_REG_FP + */ + return ctx->seen & (SEEN_FUNC | SEEN_TAILCALL | SEEN_NVREG_FULL_MASK) || + bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)); +} + void bpf_jit_realloc_regs(struct codegen_context *ctx) { unsigned int nvreg_mask; @@ -121,7 +135,8 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) #define BPF_TAILCALL_PROLOGUE_SIZE 4 - EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); + if (bpf_has_stack_frame(ctx)) + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_TAILCALL) EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); @@ -174,7 +189,8 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); /* Tear down our stack frame */ - EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); + if (bpf_has_stack_frame(ctx)) + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_MTLR(_R0)); -- cgit v1.2.3 From 7dd0e2848764306d7a70943b97584ffdc7754708 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:26 +0100 Subject: powerpc/bpf/32: BPF prog is never called with more than one arg BPF progs are never called with more than one argument, plus the tail call count as a second argument when needed. So, no need to retrieve 9th and 10th argument (5th 64 bits argument) from the stack in prologue. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/89a200fb45048601475c092c5775294dee3886de.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 7f54d37bede6..7c129fe810f5 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -159,12 +159,6 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (bpf_is_seen_register(ctx, i)) EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); - /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ - if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); - } - /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) { EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0)); -- cgit v1.2.3 From 85e031154c7c14edee0705532a9ffc8a2fe591d0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:27 +0100 Subject: powerpc/bpf: Perform complete extra passes to update addresses BPF core calls the jit compiler again for an extra pass in order to properly set subprog addresses. Unlike other architectures, powerpc only updates the addresses during that extra pass. It means that holes must have been left in the code in order to enable the maximum possible instruction size. In order to avoid waste of space, and waste of CPU time on powerpc processors on which the NOP instruction is not 0-cycle, perform two real additional passes. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d484a4ac95949ff55fc4344b674e7c0d3ddbfcd5.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit.h | 2 +- arch/powerpc/net/bpf_jit_comp.c | 91 ++------------------------------------- arch/powerpc/net/bpf_jit_comp32.c | 4 +- arch/powerpc/net/bpf_jit_comp64.c | 4 +- 4 files changed, 8 insertions(+), 93 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index a4f7880f959d..d767e39d5645 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -169,7 +169,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) void bpf_jit_init_reg_mapping(struct codegen_context *ctx); int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, int pass); + u32 *addrs, int pass, bool extra_pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 43e634126514..e93aefcfb83f 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -23,74 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */ -static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) -{ - const struct bpf_insn *insn = fp->insnsi; - bool func_addr_fixed; - u64 func_addr; - u32 tmp_idx; - int i, j, ret; - - for (i = 0; i < fp->len; i++) { - /* - * During the extra pass, only the branch target addresses for - * the subprog calls need to be fixed. All other instructions - * can left untouched. - * - * The JITed image length does not change because we already - * ensure that the JITed instruction sequence for these calls - * are of fixed length by padding them with NOPs. - */ - if (insn[i].code == (BPF_JMP | BPF_CALL) && - insn[i].src_reg == BPF_PSEUDO_CALL) { - ret = bpf_jit_get_func_addr(fp, &insn[i], true, - &func_addr, - &func_addr_fixed); - if (ret < 0) - return ret; - - /* - * Save ctx->idx as this would currently point to the - * end of the JITed image and set it to the offset of - * the instruction sequence corresponding to the - * subprog call temporarily. - */ - tmp_idx = ctx->idx; - ctx->idx = addrs[i] / 4; - ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); - if (ret) - return ret; - - /* - * Restore ctx->idx here. This is safe as the length - * of the JITed sequence remains unchanged. - */ - ctx->idx = tmp_idx; - } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { - tmp_idx = ctx->idx; - ctx->idx = addrs[i] / 4; -#ifdef CONFIG_PPC32 - PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm); - PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm); - for (j = ctx->idx - addrs[i] / 4; j < 4; j++) - EMIT(PPC_RAW_NOP()); -#else - func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); - PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr); - /* overwrite rest with nops */ - for (j = ctx->idx - addrs[i] / 4; j < 5; j++) - EMIT(PPC_RAW_NOP()); -#endif - ctx->idx = tmp_idx; - i++; - } - } - - return 0; -} - int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) { if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { @@ -185,7 +117,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) { /* We hit something illegal or unsupported. */ fp = org_fp; goto out_addrs; @@ -200,7 +132,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) { fp = org_fp; goto out_addrs; } @@ -234,29 +166,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); - if (extra_pass) { - /* - * Do not touch the prologue and epilogue as they will remain - * unchanged. Only fix the branch target address for subprog - * calls in the body, and ldimm64 instructions. - * - * This does not change the offsets and lengths of the subprog - * call instruction sequences and hence, the size of the JITed - * image as well. - */ - bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs); - - /* There is no need to perform the usual passes. */ - goto skip_codegen_passes; - } - /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; cgctx.alt_exit_addr = 0; bpf_jit_build_prologue(code_base, &cgctx); - if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { + if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) { bpf_jit_binary_free(bpf_hdr); fp = org_fp; goto out_addrs; @@ -268,7 +184,6 @@ skip_init_ctx: proglen - (cgctx.idx * 4), cgctx.seen); } -skip_codegen_passes: if (bpf_jit_enable > 1) /* * Note that we output the base address of the code_base diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 7c129fe810f5..20493b851248 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -282,7 +282,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, int pass) + u32 *addrs, int pass, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -1002,7 +1002,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], false, + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) return ret; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 29ee306d6302..6298c1483081 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -343,7 +343,7 @@ asm ( /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, int pass) + u32 *addrs, int pass, bool extra_pass) { enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; @@ -967,7 +967,7 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], false, + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) return ret; -- cgit v1.2.3 From d3921cbb6cd663193cecf04f0b170a30c6d0e390 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:28 +0100 Subject: powerpc/bpf: Only pad length-variable code at initial pass Now that two real additional passes are performed in case of extra pass requested by BPF core, padding is not needed anymore except during initial pass done before memory allocation to count maximum possible program size. So, only do the padding when 'image' is NULL. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/921851d6577badc1e6b08b270a0ced80a6a26d03.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 8 +++----- arch/powerpc/net/bpf_jit_comp64.c | 12 +++++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 20493b851248..c3bc20b91cdc 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -206,9 +206,6 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func if (image && rel < 0x2000000 && rel >= -0x2000000) { PPC_BL(func); - EMIT(PPC_RAW_NOP()); - EMIT(PPC_RAW_NOP()); - EMIT(PPC_RAW_NOP()); } else { /* Load function address into r0 */ EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); @@ -973,8 +970,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); /* padding to allow full 4 instructions for later patching */ - for (j = ctx->idx - tmp_idx; j < 4; j++) - EMIT(PPC_RAW_NOP()); + if (!image) + for (j = ctx->idx - tmp_idx; j < 4; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 6298c1483081..8dd3cabaa83a 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -240,13 +240,14 @@ int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func * load the callee's address, but this may optimize the number of * instructions required based on the nature of the address. * - * Since we don't want the number of instructions emitted to change, + * Since we don't want the number of instructions emitted to increase, * we pad the optimized PPC_LI64() call with NOPs to guarantee that * we always have a five-instruction sequence, which is the maximum * that PPC_LI64() can emit. */ - for (i = ctx->idx - ctx_idx; i < 5; i++) - EMIT(PPC_RAW_NOP()); + if (!image) + for (i = ctx->idx - ctx_idx; i < 5; i++) + EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); @@ -938,8 +939,9 @@ emit_clear: tmp_idx = ctx->idx; PPC_LI64(dst_reg, imm64); /* padding to allow full 5 instructions for later patching */ - for (j = ctx->idx - tmp_idx; j < 5; j++) - EMIT(PPC_RAW_NOP()); + if (!image) + for (j = ctx->idx - tmp_idx; j < 5; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; -- cgit v1.2.3 From 8616045fe785229b53a24b8698631826298d1500 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:29 +0100 Subject: powerpc/bpf/32: Optimise some particular const operations Simplify multiplications and divisions with constants when the constant is 1 or -1. When the constant is a power of 2, replace them by bit shits. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e53b1f4a4150ec6cabcaeeef82bf9c361b5f9204.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index c3bc20b91cdc..b1d6ed4d8270 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -391,7 +391,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ - if (imm >= -32768 && imm < 32768) { + if (imm == 1) + break; + if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + } else if (is_power_of_2((u32)imm)) { + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, ilog2(imm))); + } else if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); } else { PPC_LI32(_R0, imm); @@ -411,6 +417,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); break; } + if (imm > 0 && is_power_of_2(imm)) { + imm = ilog2(imm); + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); + break; + } bpf_set_seen_register(ctx, tmp_reg); PPC_LI32(tmp_reg, imm); EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); @@ -438,8 +451,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm == 1) break; - PPC_LI32(_R0, imm); - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0)); + if (is_power_of_2((u32)imm)) { + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, ilog2(imm))); + } else { + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0)); + } break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ if (!imm) -- cgit v1.2.3 From c88da29b4d2ce8d0070646b8f99729e9b355a4bf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:30 +0100 Subject: powerpc/bpf/32: introduce a second source register for ALU operations At the time being, all ALU operation are performed with same L-source and destination, requiring the L-source to be moved into destination via a separate register move, like: 70: 7f c6 f3 78 mr r6,r30 74: 7f a5 eb 78 mr r5,r29 78: 30 c6 ff f4 addic r6,r6,-12 7c: 7c a5 01 d4 addme r5,r5 Introduce a second source register to all ALU operations. For the time being that second source register is made equal to the destination register. That change will allow, via following patch, to optimise the generated code as: 70: 30 de ff f4 addic r6,r30,-12 74: 7c bd 01 d4 addme r5,r29 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d5aaaba50d9d6b4a0e9f0cd4a5e34101aca1e247.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 350 ++++++++++++++++++++------------------ 1 file changed, 183 insertions(+), 167 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index b1d6ed4d8270..5d36ff7a0a8b 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -294,6 +294,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 dst_reg_h = dst_reg - 1; u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; + u32 src2_reg = dst_reg; + u32 src2_reg_h = dst_reg_h; u32 ax_reg = bpf_to_ppc(BPF_REG_AX); u32 tmp_reg = bpf_to_ppc(TMP_REG); u32 size = BPF_SIZE(code); @@ -338,108 +340,111 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG */ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADD(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ - EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h)); + EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, src_reg)); + EMIT(PPC_RAW_ADDE(dst_reg_h, src2_reg_h, src_reg_h)); break; case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SUB(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ - EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg)); - EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h)); + EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, src2_reg)); + EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, src2_reg_h)); break; case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ imm = -imm; fallthrough; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ - if (IMM_HA(imm) & 0xffff) - EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm))); + if (!imm) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + } else if (IMM_HA(imm) & 0xffff) { + EMIT(PPC_RAW_ADDIS(dst_reg, src2_reg, IMM_HA(imm))); + src2_reg = dst_reg; + } if (IMM_L(imm)) - EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + EMIT(PPC_RAW_ADDI(dst_reg, src2_reg, IMM_L(imm))); break; case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ imm = -imm; fallthrough; case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ - if (!imm) + if (!imm) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h)); break; - + } if (imm >= -32768 && imm < 32768) { - EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); + EMIT(PPC_RAW_ADDIC(dst_reg, src2_reg, imm)); } else { PPC_LI32(_R0, imm); - EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, _R0)); } if (imm >= 0 || (BPF_OP(code) == BPF_SUB && imm == 0x80000000)) - EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); + EMIT(PPC_RAW_ADDZE(dst_reg_h, src2_reg_h)); else - EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h)); + EMIT(PPC_RAW_ADDME(dst_reg_h, src2_reg_h)); break; case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_MULW(_R0, dst_reg, src_reg_h)); - EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); - EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(_R0, src2_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, src_reg)); + EMIT(PPC_RAW_MULHWU(tmp_reg, src2_reg, src_reg)); + EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg)); EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); break; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg)); break; case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ - if (imm == 1) - break; - if (imm == -1) { - EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + if (imm == 1) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + } else if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0)); } else if (is_power_of_2((u32)imm)) { - EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, ilog2(imm))); + EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, ilog2(imm))); } else if (imm >= -32768 && imm < 32768) { - EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); + EMIT(PPC_RAW_MULI(dst_reg, src2_reg, imm)); } else { PPC_LI32(_R0, imm); - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_MULW(dst_reg, src2_reg, _R0)); } break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ if (!imm) { PPC_LI32(dst_reg, 0); PPC_LI32(dst_reg_h, 0); - break; - } - if (imm == 1) - break; - if (imm == -1) { - EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); - EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); - break; - } - if (imm > 0 && is_power_of_2(imm)) { + } else if (imm == 1) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h)); + } else if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h)); + } else if (imm > 0 && is_power_of_2(imm)) { imm = ilog2(imm); - EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm)); EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); - EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); - break; + EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm)); + } else { + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, tmp_reg)); + if (imm < 0) + EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, src2_reg)); + EMIT(PPC_RAW_MULHWU(_R0, src2_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(dst_reg, src2_reg, tmp_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); } - bpf_set_seen_register(ctx, tmp_reg); - PPC_LI32(tmp_reg, imm); - EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); - if (imm < 0) - EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); - EMIT(PPC_RAW_MULHWU(_R0, dst_reg, tmp_reg)); - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); - EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); break; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ - EMIT(PPC_RAW_DIVWU(_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg)); EMIT(PPC_RAW_MULW(_R0, src_reg, _R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0)); break; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ return -EOPNOTSUPP; @@ -448,14 +453,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ if (!imm) return -EINVAL; - if (imm == 1) - break; - - if (is_power_of_2((u32)imm)) { - EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, ilog2(imm))); + if (imm == 1) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + } else if (is_power_of_2((u32)imm)) { + EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm))); } else { PPC_LI32(_R0, imm); - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0)); } break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ @@ -465,16 +469,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (!is_power_of_2((u32)imm)) { bpf_set_seen_register(ctx, tmp_reg); PPC_LI32(tmp_reg, imm); - EMIT(PPC_RAW_DIVWU(_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg)); EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); - break; - } - if (imm == 1) + EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0)); + } else if (imm == 1) { EMIT(PPC_RAW_LI(dst_reg, 0)); - else - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31)); - + } else { + imm = ilog2((u32)imm); + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - imm, 31)); + } break; case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ if (!imm) @@ -486,7 +489,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm == 1) EMIT(PPC_RAW_LI(dst_reg, 0)); else - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - ilog2(imm), 31)); EMIT(PPC_RAW_LI(dst_reg_h, 0)); break; case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ @@ -496,34 +499,38 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * return -EOPNOTSUPP; if (imm < 0) { - EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); - EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h)); imm = -imm; + src2_reg = dst_reg; + } + if (imm == 1) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h)); + } else { + imm = ilog2(imm); + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm)); } - if (imm == 1) - break; - imm = ilog2(imm); - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); - EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); - EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); break; case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ - EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); + EMIT(PPC_RAW_NEG(dst_reg, src2_reg)); break; case BPF_ALU64 | BPF_NEG: /* dst = -dst */ - EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); - EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h)); break; /* * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH */ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ - EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h)); + EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg)); + EMIT(PPC_RAW_AND(dst_reg_h, src2_reg_h, src_reg_h)); break; case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ - EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ if (imm >= 0) @@ -531,23 +538,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * fallthrough; case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ if (!IMM_H(imm)) { - EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); + EMIT(PPC_RAW_ANDI(dst_reg, src2_reg, IMM_L(imm))); } else if (!IMM_L(imm)) { - EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm))); + EMIT(PPC_RAW_ANDIS(dst_reg, src2_reg, IMM_H(imm))); } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) { - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - fls(imm), 32 - ffs(imm))); } else { PPC_LI32(_R0, imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_AND(dst_reg, src2_reg, _R0)); } break; case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ - EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h)); + EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, src2_reg_h, src_reg_h)); break; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ - EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ /* Sign-extended */ @@ -555,145 +562,154 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_LI(dst_reg_h, -1)); fallthrough; case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ - if (IMM_L(imm)) - EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_L(imm)) { + EMIT(PPC_RAW_ORI(dst_reg, src2_reg, IMM_L(imm))); + src2_reg = dst_reg; + } if (IMM_H(imm)) - EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); + EMIT(PPC_RAW_ORIS(dst_reg, src2_reg, IMM_H(imm))); break; case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ if (dst_reg == src_reg) { EMIT(PPC_RAW_LI(dst_reg, 0)); EMIT(PPC_RAW_LI(dst_reg_h, 0)); } else { - EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h)); + EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg)); + EMIT(PPC_RAW_XOR(dst_reg_h, src2_reg_h, src_reg_h)); } break; case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ if (dst_reg == src_reg) EMIT(PPC_RAW_LI(dst_reg, 0)); else - EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0) - EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h)); + EMIT(PPC_RAW_NOR(dst_reg_h, src2_reg_h, src2_reg_h)); fallthrough; case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ - if (IMM_L(imm)) - EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_L(imm)) { + EMIT(PPC_RAW_XORI(dst_reg, src2_reg, IMM_L(imm))); + src2_reg = dst_reg; + } if (IMM_H(imm)) - EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); + EMIT(PPC_RAW_XORIS(dst_reg, src2_reg, IMM_H(imm))); break; case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ - EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ bpf_set_seen_register(ctx, tmp_reg); EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); - EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SLW(dst_reg_h, src2_reg_h, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SRW(_R0, dst_reg, _R0)); - EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_SRW(_R0, src2_reg, _R0)); + EMIT(PPC_RAW_SLW(tmp_reg, src2_reg, tmp_reg)); EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0)); - EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg)); EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); break; case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */ - if (!imm) - break; - EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); + if (imm) + EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm)); + else + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */ if (imm < 0) return -EINVAL; - if (!imm) - break; - if (imm < 32) { - EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); - EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm)); - break; - } - if (imm < 64) - EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm)); - else + if (!imm) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + } else if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWIMI(dst_reg_h, src2_reg, imm, 32 - imm, 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, imm, 0, 31 - imm)); + } else if (imm < 64) { + EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg, imm, 0, 31 - imm)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + } else { EMIT(PPC_RAW_LI(dst_reg_h, 0)); - EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + } break; case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ - EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); - EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); + EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0)); EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); - EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SRW(dst_reg_h, src2_reg_h, src_reg)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ - if (!imm) - break; - EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); + if (imm) + EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, imm)); + else + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ if (imm < 0) return -EINVAL; - if (!imm) - break; - if (imm < 32) { - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); - EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); - EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31)); - break; - } - if (imm < 64) - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31)); - else + if (!imm) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h)); + } else if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, 32 - imm, imm, 31)); + } else if (imm < 64) { + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg_h, 64 - imm, imm - 32, 31)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else { EMIT(PPC_RAW_LI(dst_reg, 0)); - EMIT(PPC_RAW_LI(dst_reg_h, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } break; case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ - EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SRAW(dst_reg, src2_reg, src_reg)); break; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); - EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); + EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg)); + EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26)); - EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); - EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SRAW(tmp_reg, src2_reg_h, tmp_reg)); + EMIT(PPC_RAW_SRAW(dst_reg_h, src2_reg_h, src_reg)); EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ - if (!imm) - break; - EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); + if (imm) + EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg, imm)); + else + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); break; case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ if (imm < 0) return -EINVAL; - if (!imm) - break; - if (imm < 32) { - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); - EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); - EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); - break; + if (!imm) { + EMIT(PPC_RAW_MR(dst_reg, src2_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h)); + } else if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm)); + } else if (imm < 64) { + EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, imm - 32)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31)); + } else { + EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, 31)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31)); } - if (imm < 64) - EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32)); - else - EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31)); - EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); break; /* @@ -727,7 +743,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * switch (imm) { case 16: /* Copy 16 bits to upper part */ - EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15)); + EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg, 16, 0, 15)); /* Rotate 8 bits right & mask */ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31)); break; @@ -737,23 +753,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(_R0, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, src2_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 16, 23)); EMIT(PPC_RAW_MR(dst_reg, _R0)); break; case 64: bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); - EMIT(PPC_RAW_RLWINM(_R0, dst_reg_h, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(tmp_reg, src2_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, src2_reg_h, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); - EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 16, 23)); EMIT(PPC_RAW_MR(dst_reg, _R0)); EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); break; @@ -763,7 +779,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * switch (imm) { case 16: /* zero-extend 16 bits into 32 bits */ - EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 16, 31)); break; case 32: case 64: -- cgit v1.2.3 From 19daf0aef84f33bde9c742ed41b4ded567b8dfbf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 1 Feb 2023 11:04:31 +0100 Subject: powerpc/bpf/32: perform three operands ALU operations When an ALU instruction is preceded by a MOV instruction that just moves a source register into the destination register of the ALU, replace that MOV+ALU instructions by an ALU operation taking the source of the MOV as second source instead of using its destination. Before the change, code could look like the following, with superfluous separate register move (mr) instructions. 70: 7f c6 f3 78 mr r6,r30 74: 7f a5 eb 78 mr r5,r29 78: 30 c6 ff f4 addic r6,r6,-12 7c: 7c a5 01 d4 addme r5,r5 With this commit, addition instructions take r30 and r29 directly. 70: 30 de ff f4 addic r6,r30,-12 74: 7c bd 01 d4 addme r5,r29 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b6719beaf01f9dcbcdbb787ef67c4a2f8e3a4cb6.1675245773.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 5d36ff7a0a8b..7f91ea064c08 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -290,6 +290,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; + u32 prevcode = i ? insn[i - 1].code : 0; u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); u32 dst_reg_h = dst_reg - 1; u32 src_reg = bpf_to_ppc(insn[i].src_reg); @@ -308,6 +309,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 tmp_idx; int j; + if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) && + (BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) && + BPF_OP(prevcode) == BPF_MOV && BPF_SRC(prevcode) == BPF_X && + insn[i - 1].dst_reg == insn[i].dst_reg && insn[i - 1].imm != 1) { + src2_reg = bpf_to_ppc(insn[i - 1].src_reg); + src2_reg_h = src2_reg - 1; + ctx->idx = addrs[i - 1] / 4; + } + /* * addrs[] maps a BPF bytecode address into a real offset from * the start of the body code. -- cgit v1.2.3 From 60bd7936f99fd8cdbeca67180f80ea13d8b97a76 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 1 Feb 2023 00:18:04 +0530 Subject: powerpc/hv-24x7: Fix pvr check when setting interface version Commit ec3eb9d941a9 ("powerpc/perf: Use PVR rather than oprofile field to determine CPU version") added usage of pvr value instead of oprofile field to determine the platform. In hv-24x7 pmu driver code, pvr check uses PVR_POWER8 when assigning the interface version for power8 platform. But power8 can also have other pvr values like PVR_POWER8E and PVR_POWER8NVL. Hence the interface version won't be set properly incase of PVR_POWER8E and PVR_POWER8NVL. Fix this issue by adding the checks for PVR_POWER8E and PVR_POWER8NVL as well. Fixes: ec3eb9d941a9 ("powerpc/perf: Use PVR rather than oprofile field to determine CPU version") Reported-by: Sachin Sant Signed-off-by: Kajol Jain Tested-by: Sachin Sant Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230131184804.220756-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-24x7.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 33c23225fd54..8ce56837961e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1727,7 +1727,8 @@ static int hv_24x7_init(void) } /* POWER8 only supports v1, while POWER9 only supports v2. */ - if (PVR_VER(pvr) == PVR_POWER8) + if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || + PVR_VER(pvr) == PVR_POWER8NVL) interface_version = 1; else { interface_version = 2; -- cgit v1.2.3 From fb3b72a3f483f81a33a9693ed03dc62158a6f77e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 21 Jan 2023 19:58:03 +1000 Subject: powerpc: Consolidate 32-bit and 64-bit interrupt_enter_prepare There are two separeate implementations for 32-bit and 64-bit which mostly do the same thing. Consolidating on one implementation ends up being smaller and simpler, there is just irq soft-mask reconcile that is specific to 64-bit. There should be no real functional change with this patch, but it does make the context tracking calls necessary for 32-bit to support context tracking. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230121095805.2823731-2-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 6d8492b6e2b8..a4196ab1d016 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -74,17 +74,18 @@ #include #include -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG /* * WARN/BUG is handled with a program interrupt so minimise checks here to * avoid recursion and maximise the chance of getting the first oops handled. */ #define INT_SOFT_MASK_BUG_ON(regs, cond) \ do { \ - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && \ - (user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \ + if ((user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \ BUG_ON(cond); \ } while (0) +#else +#define INT_SOFT_MASK_BUG_ON(regs, cond) #endif #ifdef CONFIG_PPC_BOOK3S_64 @@ -151,28 +152,8 @@ static inline void booke_restore_dbcr0(void) static inline void interrupt_enter_prepare(struct pt_regs *regs) { -#ifdef CONFIG_PPC32 - if (!arch_irq_disabled_regs(regs)) - trace_hardirqs_off(); - - if (user_mode(regs)) - kuap_lock(); - else - kuap_save_and_lock(regs); - - if (user_mode(regs)) - account_cpu_user_entry(); -#endif - #ifdef CONFIG_PPC64 - bool trace_enable = false; - - if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) { - if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) - trace_enable = true; - } else { - irq_soft_mask_set(IRQS_ALL_DISABLED); - } + irq_soft_mask_set(IRQS_ALL_DISABLED); /* * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE]. @@ -188,9 +169,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs) } else { __hard_RI_enable(); } + /* Enable MSR[RI] early, to support kernel SLB and hash faults */ +#endif - /* Do this when RI=1 because it can cause SLB faults */ - if (trace_enable) + if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); if (user_mode(regs)) { @@ -215,7 +197,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs) } INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); -#endif booke_restore_dbcr0(); } -- cgit v1.2.3 From 5c4b710a8157ec271fac4806562ee1aa1b44a53d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 21 Jan 2023 19:58:04 +1000 Subject: powerpc/32: implement HAVE_CONTEXT_TRACKING_USER support Context tracking involves tracking user, kernel, guest switches. 32-bit shares interrupt and syscall entry and exit code (and context tracking calls) with 64-bit, and KVM can not be selected if CONTEXT_TRACKING_USER is enabled, so context tracking can be enabled for 32-bit. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230121095805.2823731-3-npiggin@gmail.com --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7dcb011cebf2..b4a586123853 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -207,7 +207,7 @@ config PPC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS - select HAVE_CONTEXT_TRACKING_USER if PPC64 + select HAVE_CONTEXT_TRACKING_USER select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW -- cgit v1.2.3 From 01f135506e2ed0403512c2467bd50746bdbd576d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 21 Jan 2023 19:58:05 +1000 Subject: powerpc/32: select HAVE_VIRT_CPU_ACCOUNTING_GEN cputime_t is no longer a type, so VIRT_CPU_ACCOUNTING_GEN does not have any affect on the type for 32-bit architectures, so there is no reason it can't be supported. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230121095805.2823731-4-npiggin@gmail.com --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b4a586123853..24e023da4d2e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -257,6 +257,7 @@ config PPC select HAVE_STATIC_CALL if PPC32 select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_VIRT_CPU_ACCOUNTING_GEN select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE select IOMMU_HELPER if PPC64 select IRQ_DOMAIN -- cgit v1.2.3 From e64e71056f323a1e178dccf04d4c0f032d84436c Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 20 Jan 2023 10:32:15 +0100 Subject: powerpc/powernv/ioda: Skip unallocated resources when mapping to PE pnv_ioda_setup_pe_res() calls opal to map a resource with a PE. However, the code assumes the resource is allocated and it uses the resource address to find out the segment(s) which need to be mapped to the PE. In the unlikely case where the resource hasn't been allocated, the computation for the segment number is garbage, which can lead to invalid memory access and potentially a kernel crash, such as: [ ] pci_bus 0002:02: Configuring PE for bus [ ] pci 0002:02 : [PE# fc] Secondary bus 0x0000000000000002..0x0000000000000002 associated with PE#fc [ ] BUG: Kernel NULL pointer dereference on write at 0x00000000 [ ] Faulting instruction address: 0xc00000000005eac4 [ ] Oops: Kernel access of bad area, sig: 7 [#1] [ ] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV [ ] Modules linked in: [ ] CPU: 12 PID: 1 Comm: swapper/20 Not tainted 5.10.50-openpower1 #2 [ ] NIP: c00000000005eac4 LR: c00000000005ea44 CTR: 0000000030061b9c [ ] REGS: c000200007383650 TRAP: 0300 Not tainted (5.10.50-openpower1) [ ] MSR: 9000000000009033 CR: 44000224 XER: 20040000 [ ] CFAR: c00000000005eaa0 DAR: 0000000000000000 DSISR: 02080000 IRQMASK: 0 [ ] GPR00: c00000000005dd98 c0002000073838e0 c00000000185de00 c000200fff018960 [ ] GPR04: 00000000000000fc 0000000000000003 0000000000000000 0000000000000000 [ ] GPR08: 0000000000000000 0000000000000000 0000000000000000 9000000000001033 [ ] GPR12: 0000000031cb0000 c000000ffffe6a80 c000000000010a58 0000000000000000 [ ] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ ] GPR20: 0000000000000000 0000000000000000 0000000000000000 c00000000711e200 [ ] GPR24: 0000000000000100 c000200009501120 c00020000cee2800 00000000000003ff [ ] GPR28: c000200fff018960 0000000000000000 c000200ffcb7fd00 0000000000000000 [ ] NIP [c00000000005eac4] pnv_ioda_setup_pe_res+0x94/0x1a0 [ ] LR [c00000000005ea44] pnv_ioda_setup_pe_res+0x14/0x1a0 [ ] Call Trace: [ ] [c0002000073838e0] [c00000000005eb98] pnv_ioda_setup_pe_res+0x168/0x1a0 (unreliable) [ ] [c000200007383970] [c00000000005dd98] pnv_pci_ioda_dma_dev_setup+0x43c/0x970 [ ] [c000200007383a60] [c000000000032cdc] pcibios_bus_add_device+0x78/0x18c [ ] [c000200007383aa0] [c00000000028f2bc] pci_bus_add_device+0x28/0xbc [ ] [c000200007383b10] [c00000000028f3a0] pci_bus_add_devices+0x50/0x7c [ ] [c000200007383b50] [c00000000028f3c4] pci_bus_add_devices+0x74/0x7c [ ] [c000200007383b90] [c00000000028f3c4] pci_bus_add_devices+0x74/0x7c [ ] [c000200007383bd0] [c00000000069ad0c] pcibios_init+0xf0/0x104 [ ] [c000200007383c50] [c0000000000106d8] do_one_initcall+0x84/0x1c4 [ ] [c000200007383d20] [c0000000006910b8] kernel_init_freeable+0x264/0x268 [ ] [c000200007383dc0] [c000000000010a68] kernel_init+0x18/0x138 [ ] [c000200007383e20] [c00000000000cbfc] ret_from_kernel_thread+0x5c/0x80 [ ] Instruction dump: [ ] 7f89e840 409d000c 7fbbf840 409c000c 38210090 4848f448 809c002c e95e0120 [ ] 7ba91764 38a00003 57a7043e 38c00000 <7c8a492e> 5484043e e87e0018 4bff23bd Hitting the problem is not that easy. It was seen with a (semi-bogus) PCI device with a class code of 0. The generic PCI framework doesn't allocate resources in such a case. The patch is simply skipping resources which are still flagged with IORESOURCE_UNSET. We don't have the problem with 64-bit mem resources, as the address of the resource is checked to be within the range of the 64-bit mmio window. See pnv_ioda_reserve_dev_m64_pe() and pnv_pci_is_m64(). Reported-by: Andrew Jeffery Fixes: 23e79425fe7c ("powerpc/powernv: Simplify pnv_ioda_setup_pe_seg()") Signed-off-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230120093215.19496-1-fbarrat@linux.ibm.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5c144c05cbfd..4f6e20a35aa1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2325,7 +2325,8 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, int index; int64_t rc; - if (!res || !res->flags || res->start > res->end) + if (!res || !res->flags || res->start > res->end || + res->flags & IORESOURCE_UNSET) return; if (res->flags & IORESOURCE_IO) { -- cgit v1.2.3 From 26d53a9c89a8486c5c637cc587e1933a786747d0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Feb 2023 21:38:56 +1000 Subject: crypto: powerpc - Use address generation helper for asm Replace open-coded toc-relative address calculation with helper macros, commit dab3b8f4fd09 ("powerpc/64: asm use consistent global variable declaration and access") made similar conversions already but missed this one. This allows data addressing model to be changed more easily. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203113858.1152093-2-npiggin@gmail.com --- arch/powerpc/crypto/crc32-vpmsum_core.S | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S index a16a717c809c..b0f87f595b26 100644 --- a/arch/powerpc/crypto/crc32-vpmsum_core.S +++ b/arch/powerpc/crypto/crc32-vpmsum_core.S @@ -113,9 +113,7 @@ FUNC_START(CRC_FUNCTION_NAME) #endif #ifdef BYTESWAP_DATA - addis r3,r2,.byteswap_constant@toc@ha - addi r3,r3,.byteswap_constant@toc@l - + LOAD_REG_ADDR(r3, .byteswap_constant) lvx byteswap,0,r3 addi r3,r3,16 #endif @@ -150,8 +148,7 @@ FUNC_START(CRC_FUNCTION_NAME) addi r7,r7,-1 mtctr r7 - addis r3,r2,.constants@toc@ha - addi r3,r3,.constants@toc@l + LOAD_REG_ADDR(r3, .constants) /* Find the start of our constants */ add r3,r3,r8 @@ -506,8 +503,7 @@ FUNC_START(CRC_FUNCTION_NAME) .Lbarrett_reduction: /* Barrett constants */ - addis r3,r2,.barrett_constants@toc@ha - addi r3,r3,.barrett_constants@toc@l + LOAD_REG_ADDR(r3, .barrett_constants) lvx const1,0,r3 lvx const2,off16,r3 @@ -610,8 +606,7 @@ FUNC_START(CRC_FUNCTION_NAME) cmpdi r5,0 beq .Lzero - addis r3,r2,.short_constants@toc@ha - addi r3,r3,.short_constants@toc@l + LOAD_REG_ADDR(r3, .short_constants) /* Calculate where in the constant table we need to start */ subfic r6,r5,256 -- cgit v1.2.3 From 58f24eea5278cb6078552e16063fdd8b0a1b9676 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Feb 2023 21:38:57 +1000 Subject: powerpc/64s: Refactor initialisation after prom Move some basic Book3S initialisation after prom to a function similar to what Book3E looks like. Book3E returns from this function at the virtual address mapping, and Book3S will do the same in a later change, so making them look similar helps with that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203113858.1152093-3-npiggin@gmail.com --- arch/powerpc/kernel/head_64.S | 44 ++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 7558ba4eb864..5af2e473b195 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -475,8 +475,30 @@ SYM_FUNC_START_LOCAL(__mmu_off) rfid b . /* prevent speculative execution */ SYM_FUNC_END(__mmu_off) -#endif +start_initialization_book3s: + mflr r25 + + /* Setup some critical 970 SPRs before switching MMU off */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 /* 970 */ + beq 1f + cmpwi r0,0x3c /* 970FX */ + beq 1f + cmpwi r0,0x44 /* 970MP */ + beq 1f + cmpwi r0,0x45 /* 970GX */ + bne 2f +1: bl __cpu_preinit_ppc970 +2: + + /* Switch off MMU if not already off */ + bl __mmu_off + + mtlr r25 + blr +#endif /* * Here is our main kernel entry point. We support currently 2 kind of entries @@ -523,26 +545,10 @@ __start_initialization_multiplatform: #ifdef CONFIG_PPC_BOOK3E_64 bl start_initialization_book3e - b __after_prom_start #else - /* Setup some critical 970 SPRs before switching MMU off */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 /* 970 */ - beq 1f - cmpwi r0,0x3c /* 970FX */ - beq 1f - cmpwi r0,0x44 /* 970MP */ - beq 1f - cmpwi r0,0x45 /* 970GX */ - bne 2f -1: bl __cpu_preinit_ppc970 -2: - - /* Switch off MMU if not already off */ - bl __mmu_off - b __after_prom_start + bl start_initialization_book3s #endif /* CONFIG_PPC_BOOK3E_64 */ + b __after_prom_start __REF __boot_from_prom: -- cgit v1.2.3 From ffc8e90decc531a2dd59ef9e1e6f16a52057ab62 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Feb 2023 21:38:58 +1000 Subject: powerpc/64e: Simplify address calculation in secondary hold loop As the earlier comment explains, __secondary_hold_spinloop does not have to be accessed at its virtual address, slightly simplifying code. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230203113858.1152093-4-npiggin@gmail.com --- arch/powerpc/kernel/head_64.S | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 5af2e473b195..3a7266fa8a18 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -160,12 +160,8 @@ __secondary_hold: std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) sync - li r26,0 -#ifdef CONFIG_PPC_BOOK3E_64 - tovirt(r26,r26) -#endif /* All secondary cpus wait here until told to start. */ -100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(0) cmpdi 0,r12,0 beq 100b -- cgit v1.2.3 From dea18da459922d70692ae1e5736d1c760040cb07 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Dec 2022 21:59:27 +1000 Subject: powerpc/64s: Fix stress_hpt memblock alloc alignment The stress_hpt memblock allocation did not pass in an alignment, which causes a stack dump in early boot (that I missed, oops). Fixes: 6b34a099faa1 ("powerpc/64s/hash: add stress_hpt kernel boot option to increase hash faults") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221216115930.2667772-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 80a148c57de8..fa1c148e1f50 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1051,7 +1051,8 @@ static void __init htab_initialize(void) static_branch_enable(&stress_hpt_key); // Too early to use nr_cpu_ids, so use NR_CPUS tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS, - 0, 0, MEMBLOCK_ALLOC_ANYWHERE); + __alignof__(struct stress_hpt_struct), + 0, MEMBLOCK_ALLOC_ANYWHERE); memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS); stress_hpt_struct = __va(tmp); -- cgit v1.2.3 From 9fa24404f5044967753a6cd3e5e36f57686bec6e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Dec 2022 21:59:28 +1000 Subject: powerpc/64: Fix task_cpu in early boot when booting non-zero cpuid powerpc/64 can boot on a non-zero SMP processor id. Initially, the boot CPU is said to be "assumed to be 0" until early_init_devtree() discovers the id from the device tree. That is not a good description because the assumption can be wrong and that has to be handled, the better description is that 0 is used as a placeholder, and things are fixed after the real id is discovered. smp_processor_id() is set to the boot cpuid, but task_cpu(current) is not, which causes the smp_processor_id() == task_cpu(current) invariant to be broken until init_idle() in sched_init(). This is quite fragile and could lead to subtle bugs in future. One bug is that validate_sp_size uses task_cpu() to get the process stack, so any stack trace from the booting CPU between early_init_devtree() and sched_init() will have problems. Early on paca_ptrs[0] will be poisoned, so that can cause machine checks dereferencing that memory in real mode. Later, validating the current stack pointer against the idle task of a different secondary will probably cause no stack trace to be printed. Fix this by setting thread_info->cpu right after smp_processor_id() is set to the boot cpuid. Signed-off-by: Nicholas Piggin [mpe: Fix SMP=n build as reported by sfr] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221216115930.2667772-3-npiggin@gmail.com --- arch/powerpc/kernel/setup_64.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a0dee7354fe6..a43865e0fb4b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -396,6 +396,11 @@ void __init early_setup(unsigned long dt_ptr) } fixup_boot_paca(paca_ptrs[boot_cpuid]); setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */ + // smp_processor_id() now reports boot_cpuid + +#ifdef CONFIG_SMP + task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current) +#endif /* * Configure exception handlers. This include setting up trampolines -- cgit v1.2.3 From dc222fa7737212fe0da513e5b8937c156d02225d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Dec 2022 21:59:29 +1000 Subject: powerpc/64: Move paca allocation to early_setup() The early paca and boot cpuid dance is complicated and currently does not quite work as expected for boot cpuid != 0 cases. early_init_devtree() currently allocates the paca_ptrs and boot cpuid paca, but until that returns and early_setup() calls setup_paca(), this thread is currently still executing with smp_processor_id() == 0. One problem this causes is the paca_ptrs[smp_processor_id()] pointer is poisoned, so valid_emergency_stack() (any backtrace) and any similar users will crash. Another is that the hardware id which is set here will not be returned by get_hard_smp_processor_id(smp_processor_id()), but it would work correctly for boot_cpuid == 0, which could lead to difficult to reproduce or find bugs. The hard id does not seem to be used by the rest of early_init_devtree(), it just looks like all this code might have been put here to allocate somewhere to store boot CPU hardware id while scanning the devtree. Rearrange things so the hwid is put in a global variable like boot_cpuid, and do all the paca allocation and boot paca setup in the 64-bit early_setup() after we have everything ready to go. The paca_ptrs[0] re-poisoning code in early_setup does not seem to have ever worked, because paca_ptrs[0] was never not-poisoned when boot_cpuid is not 0. Signed-off-by: Nicholas Piggin [mpe: Fix build error on 32-bit] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221216115930.2667772-4-npiggin@gmail.com --- arch/powerpc/include/asm/paca.h | 1 - arch/powerpc/include/asm/smp.h | 1 + arch/powerpc/kernel/prom.c | 12 +++--------- arch/powerpc/kernel/setup-common.c | 4 ++++ arch/powerpc/kernel/setup_64.c | 11 +++++------ 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 09f1790d0ae1..0ab3511a47d7 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -295,7 +295,6 @@ extern void free_unused_pacas(void); #else /* CONFIG_PPC64 */ -static inline void allocate_paca_ptrs(void) { } static inline void allocate_paca(int cpu) { } static inline void free_unused_pacas(void) { } diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index f63505d74932..6c6cb53d7045 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -26,6 +26,7 @@ #include extern int boot_cpuid; +extern int boot_cpu_hwid; /* PPC64 only */ extern int spinning_secondaries; extern u32 *cpu_to_phys_id; extern bool coregroup_enabled; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4f1c920aa13e..f318e8e1f3fe 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -370,8 +370,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; - // Pass the boot CPU's hard CPU id back to our caller - *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); /* * PAPR defines "logical" PVR values for cpus that @@ -755,7 +755,6 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - u32 boot_cpu_hwid; phys_addr_t limit; DBG(" -> early_init_devtree(%px)\n", params); @@ -851,7 +850,7 @@ void __init early_init_devtree(void *params) /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -868,11 +867,6 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); - // NB. paca is not installed until later in early_setup() - allocate_paca_ptrs(); - allocate_paca(boot_cpuid); - set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); - #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9b10e57040c6..e77734e5a127 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -87,6 +87,10 @@ EXPORT_SYMBOL(machine_id); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +#ifdef CONFIG_PPC64 +int boot_cpu_hwid = -1; +#endif + /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a43865e0fb4b..b2e0d3ce4261 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -385,15 +385,14 @@ void __init early_setup(unsigned long dt_ptr) /* * Do early initialization using the flattened device * tree, such as retrieving the physical memory map or - * calculating/retrieving the hash table size. + * calculating/retrieving the hash table size, discover + * boot_cpuid and boot_cpu_hwid. */ early_init_devtree(__va(dt_ptr)); - /* Now we know the logical id of our boot cpu, setup the paca. */ - if (boot_cpuid != 0) { - /* Poison paca_ptrs[0] again if it's not the boot cpu */ - memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); - } + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); fixup_boot_paca(paca_ptrs[boot_cpuid]); setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */ // smp_processor_id() now reports boot_cpuid -- cgit v1.2.3 From 1ee4e35076e36724a62ba2fa29b722fb53eb68f2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 16 Dec 2022 21:59:30 +1000 Subject: powerpc: Skip stack validation checking alternate stacks if they are not allocated Stack validation in early boot can just bail out of checking alternate stacks if they are not validated yet. Checking against a NULL stack could cause NULLish pointer values to be considered valid. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221216115930.2667772-5-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index effe9697905d..4b29ac5ddac6 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2117,6 +2117,9 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); + if (!hardirq_ctx[cpu] || !softirq_ctx[cpu]) + return 0; + stack_page = (unsigned long)hardirq_ctx[cpu]; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2138,6 +2141,14 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, if (!paca_ptrs) return 0; + if (!paca_ptrs[cpu]->emergency_sp) + return 0; + +# ifdef CONFIG_PPC_BOOK3S_64 + if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp) + return 0; +#endif + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; -- cgit v1.2.3 From 2fb857bc9f9e106439017ed323f522cc785395bb Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Mon, 6 Feb 2023 13:17:57 +1100 Subject: powerpc/kcsan: Add exclusions from instrumentation Exclude various incompatible compilation units from KCSAN instrumentation. Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206021801.105268-2-rmclure@linux.ibm.com --- arch/powerpc/kernel/Makefile | 10 ++++++++++ arch/powerpc/kernel/trace/Makefile | 1 + arch/powerpc/kernel/vdso/Makefile | 1 + arch/powerpc/lib/Makefile | 2 ++ arch/powerpc/purgatory/Makefile | 1 + arch/powerpc/xmon/Makefile | 1 + 6 files changed, 16 insertions(+) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9b6146056e48..9bf2be123093 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif +KCSAN_SANITIZE_early_32.o := n +KCSAN_SANITIZE_early_64.o := n +KCSAN_SANITIZE_cputable.o := n +KCSAN_SANITIZE_btext.o := n +KCSAN_SANITIZE_paca.o := n +KCSAN_SANITIZE_setup_64.o := n + #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET # Remove stack protector to avoid triggering unneeded stack canary # checks due to randomize_kstack_offset. @@ -177,12 +184,15 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n +KCSAN_SANITIZE_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n GCOV_PROFILE_kprobes.o := n KCOV_INSTRUMENT_kprobes.o := n +KCSAN_SANITIZE_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n GCOV_PROFILE_kprobes-ftrace.o := n KCOV_INSTRUMENT_kprobes-ftrace.o := n +KCSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_kprobes-ftrace.o := n GCOV_PROFILE_syscall_64.o := n KCOV_INSTRUMENT_syscall_64.o := n diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index af8527538fe4..b16a9f9c0b35 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -23,4 +23,5 @@ obj-$(CONFIG_PPC32) += $(obj32-y) # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_ftrace.o := n KCOV_INSTRUMENT_ftrace.o := n +KCSAN_SANITIZE_ftrace.o := n UBSAN_SANITIZE_ftrace.o := n diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 6a977b0d8ffc..3a2f32929fcf 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -46,6 +46,7 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 4de71cbf6e8e..c4db459d304a 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,6 +16,8 @@ KASAN_SANITIZE_feature-fixups.o := n # restart_table.o contains functions called in the NMI interrupt path # which can be in real mode. Disable KASAN. KASAN_SANITIZE_restart_table.o := n +KCSAN_SANITIZE_code-patching.o := n +KCSAN_SANITIZE_feature-fixups.o := n ifdef CONFIG_KASAN CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index a81d155b89ae..6f5e2727963c 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 KASAN_SANITIZE := n +KCSAN_SANITIZE := n targets += trampoline_$(BITS).o purgatory.ro diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index eb25d7554ffd..d334de392e6c 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -5,6 +5,7 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n # Disable ftrace for the entire directory ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE) -- cgit v1.2.3 From 2a7ce82dc46c591c9244057d89a6591c9639b9b9 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Mon, 6 Feb 2023 13:17:58 +1100 Subject: powerpc/kcsan: Exclude udelay to prevent recursive instrumentation In order for KCSAN to increase its likelihood of observing a data race, it sets a watchpoint on memory accesses and stalls, allowing for detection of conflicting accesses by other kernel threads or interrupts. Stalls are implemented by injecting a call to udelay in instrumented code. To prevent recursive instrumentation, exclude udelay from being instrumented. Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206021801.105268-3-rmclure@linux.ibm.com --- arch/powerpc/kernel/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index d68de3618741..b894029f53db 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -356,7 +356,7 @@ void vtime_flush(struct task_struct *tsk) } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -void __delay(unsigned long loops) +void __no_kcsan __delay(unsigned long loops) { unsigned long start; @@ -377,7 +377,7 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -void udelay(unsigned long usecs) +void __no_kcsan udelay(unsigned long usecs) { __delay(tb_ticks_per_usec * usecs); } -- cgit v1.2.3 From b6e259297a6bffb882d55715284bb5219eefda42 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Mon, 6 Feb 2023 13:17:59 +1100 Subject: powerpc/kcsan: Memory barriers semantics Annotate memory barriers *mb() with calls to kcsan_mb(), signaling to compilers supporting KCSAN that the respective memory barrier has been issued. Rename memory barrier *mb() to __*mb() to opt in for asm-generic/barrier.h to generate the respective *mb() macro. Signed-off-by: Rohan McLure Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206021801.105268-4-rmclure@linux.ibm.com --- arch/powerpc/include/asm/barrier.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index e80b2c0e9315..b95b666f0374 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -35,9 +35,9 @@ * However, on CPUs that don't support lwsync, lwsync actually maps to a * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio. */ -#define mb() __asm__ __volatile__ ("sync" : : : "memory") -#define rmb() __asm__ __volatile__ ("sync" : : : "memory") -#define wmb() __asm__ __volatile__ ("sync" : : : "memory") +#define __mb() __asm__ __volatile__ ("sync" : : : "memory") +#define __rmb() __asm__ __volatile__ ("sync" : : : "memory") +#define __wmb() __asm__ __volatile__ ("sync" : : : "memory") /* The sub-arch has lwsync */ #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC) @@ -51,12 +51,12 @@ /* clang defines this macro for a builtin, which will not work with runtime patching */ #undef __lwsync #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") -#define dma_rmb() __lwsync() -#define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") +#define __dma_rmb() __lwsync() +#define __dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") #define __smp_lwsync() __lwsync() -#define __smp_mb() mb() +#define __smp_mb() __mb() #define __smp_rmb() __lwsync() #define __smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") -- cgit v1.2.3 From 4f8e09106f6e457c6e9a4ce597fa9ae2bda032c3 Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Mon, 6 Feb 2023 13:18:00 +1100 Subject: powerpc/kcsan: Prevent recursive instrumentation with IRQ save/restores Instrumented memory accesses provided by KCSAN will access core-local memories (which will save and restore IRQs) as well as restoring IRQs directly. Avoid recursive instrumentation by applying __no_kcsan annotation to IRQ restore routines. Signed-off-by: Rohan McLure [mpe: Resolve merge conflict with IRQ replay recursion changes] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206021801.105268-5-rmclure@linux.ibm.com --- arch/powerpc/kernel/irq_64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 9dc0ad3c533a..c788c55512ed 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -94,7 +94,7 @@ static inline bool irq_happened_test_and_clear(u8 irq) return false; } -static void __replay_soft_interrupts(void) +static __no_kcsan void __replay_soft_interrupts(void) { struct pt_regs regs; @@ -171,7 +171,7 @@ static void __replay_soft_interrupts(void) local_paca->irq_happened &= ~PACA_IRQ_REPLAYING; } -void replay_soft_interrupts(void) +__no_kcsan void replay_soft_interrupts(void) { irq_enter(); /* See comment in arch_local_irq_restore */ __replay_soft_interrupts(); @@ -179,7 +179,7 @@ void replay_soft_interrupts(void) } #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) -static inline void replay_soft_interrupts_irqrestore(void) +static inline __no_kcsan void replay_soft_interrupts_irqrestore(void) { unsigned long kuap_state = get_kuap(); @@ -203,7 +203,7 @@ static inline void replay_soft_interrupts_irqrestore(void) #define replay_soft_interrupts_irqrestore() __replay_soft_interrupts() #endif -notrace void arch_local_irq_restore(unsigned long mask) +notrace __no_kcsan void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; -- cgit v1.2.3 From 6f0926c00565a91f3bd7ca1aa05db307daed5e0f Mon Sep 17 00:00:00 2001 From: Rohan McLure Date: Mon, 6 Feb 2023 13:18:01 +1100 Subject: powerpc/kcsan: Add KCSAN Support Enable HAVE_ARCH_KCSAN for 64-bit Book3S, permitting use of the kernel concurrency sanitiser through the CONFIG_KCSAN_* kconfig options. KCSAN requires compiler builtins __atomic_* 64-bit values, and so only report support on 64-bit. See documentation in Documentation/dev-tools/kcsan.rst for more information. Signed-off-by: Rohan McLure [mpe: Limit to Book3S to avoid build failure on Book3E] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230206021801.105268-6-rmclure@linux.ibm.com --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 24e023da4d2e..1f3306e9d622 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -198,6 +198,7 @@ config PPC select HAVE_ARCH_KASAN if PPC_RADIX_MMU select HAVE_ARCH_KASAN if PPC_BOOK3E_64 select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN + select HAVE_ARCH_KCSAN if PPC_BOOK3S_64 select HAVE_ARCH_KFENCE if ARCH_SUPPORTS_DEBUG_PAGEALLOC select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_KGDB -- cgit v1.2.3 From 5705c6d97efc4aa9478fe2887fd911f60ddf17e5 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 3 Jan 2023 17:51:03 +0000 Subject: powerpc/ps3: Change updateboltedpp() panic to info Commit fdacae8a8402 ("powerpc: Activate CONFIG_STRICT_KERNEL_RWX by default") causes ps3_hpte_updateboltedpp() to be called. The correct fix would be to implement updateboltedpp() for PS3, but it's not clear if that's possible. As a stop-gap, change the panic statment in ps3_hpte_updateboltedpp() to a pr_info statement so that bootup can continue. Signed-off-by: Geoff Levand [mpe: Flesh out change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2df879d982809c05b0dfade57942fe03dbe9e7de.1672767868.git.geoff@infradead.org --- arch/powerpc/platforms/ps3/htab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index c27e6cf85272..9de62bd52650 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -146,7 +146,7 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { - panic("ps3_hpte_updateboltedpp() not implemented"); + pr_info("ps3_hpte_updateboltedpp() not implemented"); } static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, -- cgit v1.2.3 From 544f823ec7a34332550f22735959d3e1ffcf4684 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 3 Jan 2023 17:51:03 +0000 Subject: powerpc/ps3: Refresh ps3_defconfig Refresh ps3_defconfig for v6.2. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/99e87549b17feca3494e9df6f4def04a9ec7c042.1672767868.git.geoff@infradead.org --- arch/powerpc/configs/ps3_defconfig | 39 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 0a1b42c4f26a..52a8c5450ecb 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -1,8 +1,3 @@ -CONFIG_PPC64=y -CONFIG_CELL_CPU=y -CONFIG_ALTIVEC=y -CONFIG_SMP=y -CONFIG_NR_CPUS=2 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_HIGH_RES_TIMERS=y @@ -10,11 +5,12 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_PERF_EVENTS is not set -# CONFIG_COMPAT_BRK is not set -CONFIG_SLAB=y CONFIG_PROFILING=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y +CONFIG_PPC64=y +CONFIG_CELL_CPU=y +CONFIG_ALTIVEC=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2 # CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set @@ -27,17 +23,20 @@ CONFIG_PS3_FLASH=y CONFIG_PS3_VRAM=m CONFIG_PS3_LPM=m # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_MISC=y CONFIG_KEXEC=y CONFIG_PPC_4K_PAGES=y -# CONFIG_SPARSEMEM_VMEMMAP is not set -# CONFIG_COMPACTION is not set CONFIG_SCHED_SMT=y CONFIG_PM=y CONFIG_PM_DEBUG=y # CONFIG_SECCOMP is not set -# CONFIG_PCI is not set +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_SLAB=y +# CONFIG_COMPAT_BRK is not set +# CONFIG_SPARSEMEM_VMEMMAP is not set +# CONFIG_COMPACTION is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -87,7 +86,6 @@ CONFIG_USB_USBNET=m # CONFIG_USB_NET_NET1080 is not set # CONFIG_USB_NET_CDC_SUBSET is not set # CONFIG_USB_NET_ZAURUS is not set -CONFIG_INPUT_FF_MEMLESS=m CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=m # CONFIG_INPUT_KEYBOARD is not set @@ -110,13 +108,10 @@ CONFIG_SND=m # CONFIG_SND_DRIVERS is not set CONFIG_SND_USB_AUDIO=m CONFIG_HIDRAW=y -CONFIG_HID_APPLE=m CONFIG_HID_BELKIN=m CONFIG_HID_CHERRY=m CONFIG_HID_EZKEY=m CONFIG_HID_TWINHAN=m -CONFIG_HID_LOGITECH=m -CONFIG_HID_LOGITECH_DJ=m CONFIG_HID_MICROSOFT=m CONFIG_HID_SUNPLUS=m CONFIG_HID_SMARTJOYPLUS=m @@ -151,8 +146,12 @@ CONFIG_CIFS=m CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_LZO=m CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y +CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y @@ -163,7 +162,3 @@ CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FTRACE is not set -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_LZO=m -CONFIG_PRINTK_TIME=y -- cgit v1.2.3 From f74dcbfd27c647af9b7b83f3711c63712c677abd Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:36 +1100 Subject: powerpc/pseries: Fix handling of PLPKS object flushing timeout plpks_confirm_object_flushed() uses the H_PKS_CONFIRM_OBJECT_FLUSHED hcall to check whether changes to an object in the Platform KeyStore have been flushed to non-volatile storage. The hcall returns two output values, the return code and the flush status. plpks_confirm_object_flushed() polls the hcall until either the flush status has updated, the return code is an error, or a timeout has been exceeded. While we're still polling, the hcall is returning H_SUCCESS (0) as the return code. In the timeout case, this means that upon exiting the polling loop, rc is 0, and therefore 0 is returned to the user. Handle the timeout case separately and return ETIMEDOUT if triggered. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Reported-by: Benjamin Gray Signed-off-by: Andrew Donnellan Tested-by: Russell Currey Reviewed-by: Russell Currey Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-2-ajd@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 4edd1585e245..9e85b6d85b0b 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -248,6 +248,7 @@ static int plpks_confirm_object_flushed(struct label *label, struct plpks_auth *auth) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + bool timed_out = true; u64 timeout = 0; u8 status; int rc; @@ -259,22 +260,26 @@ static int plpks_confirm_object_flushed(struct label *label, status = retbuf[0]; if (rc) { + timed_out = false; if (rc == H_NOT_FOUND && status == 1) rc = 0; break; } - if (!rc && status == 1) + if (!rc && status == 1) { + timed_out = false; break; + } usleep_range(PKS_FLUSH_SLEEP, PKS_FLUSH_SLEEP + PKS_FLUSH_SLEEP_RANGE); timeout = timeout + PKS_FLUSH_SLEEP; } while (timeout < PKS_FLUSH_MAX_TIMEOUT); - rc = pseries_status_to_err(rc); + if (timed_out) + return -ETIMEDOUT; - return rc; + return pseries_status_to_err(rc); } int plpks_write_var(struct plpks_var var) -- cgit v1.2.3 From fcf63d6b8ab9b12c2ce1b4bde12a3c391029c998 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:37 +1100 Subject: powerpc/pseries: Fix alignment of PLPKS structures and buffers A number of structures and buffers passed to PKS hcalls have alignment requirements, which could on occasion cause problems: - Authorisation structures must be 16-byte aligned and must not cross a page boundary - Label structures must not cross page boundaries - Password output buffers must not cross page boundaries To ensure correct alignment, we adjust the allocation size of each of these structures/buffers to be the closest power of 2 that is at least the size of the structure/buffer (since kmalloc() guarantees that an allocation of a power of 2 size will be aligned to at least that size). Reported-by: Benjamin Gray Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Andrew Donnellan Reviewed-by: Russell Currey Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-3-ajd@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 9e85b6d85b0b..a01cf2ff140a 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -126,7 +126,8 @@ static int plpks_gen_password(void) u8 *password, consumer = PKS_OS_OWNER; int rc; - password = kzalloc(maxpwsize, GFP_KERNEL); + // The password must not cross a page boundary, so we align to the next power of 2 + password = kzalloc(roundup_pow_of_two(maxpwsize), GFP_KERNEL); if (!password) return -ENOMEM; @@ -162,7 +163,9 @@ static struct plpks_auth *construct_auth(u8 consumer) if (consumer > PKS_OS_OWNER) return ERR_PTR(-EINVAL); - auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL); + // The auth structure must not cross a page boundary and must be + // 16 byte aligned. We align to the next largest power of 2 + auth = kzalloc(roundup_pow_of_two(struct_size(auth, password, maxpwsize)), GFP_KERNEL); if (!auth) return ERR_PTR(-ENOMEM); @@ -196,7 +199,8 @@ static struct label *construct_label(char *component, u8 varos, u8 *name, if (component && slen > sizeof(label->attr.prefix)) return ERR_PTR(-EINVAL); - label = kzalloc(sizeof(*label), GFP_KERNEL); + // The label structure must not cross a page boundary, so we align to the next power of 2 + label = kzalloc(roundup_pow_of_two(sizeof(*label)), GFP_KERNEL); if (!label) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From c9fd2952754a03b2c14433c0318f4b46e9c0f2ef Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:38 +1100 Subject: powerpc/secvar: Fix incorrect return in secvar_sysfs_load() secvar_ops->get_next() returns -ENOENT when there are no more variables to return, which is expected behaviour. Fix this by returning 0 if get_next() returns -ENOENT. This fixes an issue introduced in commit bd5d9c743d38 ("powerpc: expose secure variables to userspace via sysfs"), but the return code of secvar_sysfs_load() was never checked so this issue never mattered. Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-4-ajd@linux.ibm.com --- arch/powerpc/kernel/secvar-sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 1ee4640a2641..7fa5f8ed9542 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -179,8 +179,10 @@ static int secvar_sysfs_load(void) rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE); if (rc) { if (rc != -ENOENT) - pr_err("error getting secvar from firmware %d\n", - rc); + pr_err("error getting secvar from firmware %d\n", rc); + else + rc = 0; + break; } -- cgit v1.2.3 From 53cea34b0a0a03568e189f8dfe2eb06f938986c8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 10 Feb 2023 19:03:39 +1100 Subject: powerpc/secvar: Use u64 in secvar_operations There's no reason for secvar_operations to use uint64_t vs the more common kernel type u64. The types are compatible, but they require different printk format strings which can lead to confusion. Change all the secvar related routines to use u64. Reviewed-by: Russell Currey Reviewed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-5-ajd@linux.ibm.com --- arch/powerpc/include/asm/secvar.h | 9 +++------ arch/powerpc/kernel/secvar-sysfs.c | 8 ++++---- arch/powerpc/platforms/powernv/opal-secvar.c | 9 +++------ security/integrity/platform_certs/load_powerpc.c | 4 ++-- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h index 4cc35b58b986..07ba36f868a7 100644 --- a/arch/powerpc/include/asm/secvar.h +++ b/arch/powerpc/include/asm/secvar.h @@ -14,12 +14,9 @@ extern const struct secvar_operations *secvar_ops; struct secvar_operations { - int (*get)(const char *key, uint64_t key_len, u8 *data, - uint64_t *data_size); - int (*get_next)(const char *key, uint64_t *key_len, - uint64_t keybufsize); - int (*set)(const char *key, uint64_t key_len, u8 *data, - uint64_t data_size); + int (*get)(const char *key, u64 key_len, u8 *data, u64 *data_size); + int (*get_next)(const char *key, u64 *key_len, u64 keybufsize); + int (*set)(const char *key, u64 key_len, u8 *data, u64 data_size); }; #ifdef CONFIG_PPC_SECURE_BOOT diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 7fa5f8ed9542..702044edf14d 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -47,7 +47,7 @@ out: static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - uint64_t dsize; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); @@ -64,8 +64,8 @@ static ssize_t data_read(struct file *filep, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { - uint64_t dsize; char *data; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); @@ -166,9 +166,9 @@ out: static int secvar_sysfs_load(void) { - char *name; - uint64_t namesize = 0; struct kobject *kobj; + u64 namesize = 0; + char *name; int rc; name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL); diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c index 14133e120bdd..ef89861569e0 100644 --- a/arch/powerpc/platforms/powernv/opal-secvar.c +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -54,8 +54,7 @@ static int opal_status_to_err(int rc) return err; } -static int opal_get_variable(const char *key, uint64_t ksize, - u8 *data, uint64_t *dsize) +static int opal_get_variable(const char *key, u64 ksize, u8 *data, u64 *dsize) { int rc; @@ -71,8 +70,7 @@ static int opal_get_variable(const char *key, uint64_t ksize, return opal_status_to_err(rc); } -static int opal_get_next_variable(const char *key, uint64_t *keylen, - uint64_t keybufsize) +static int opal_get_next_variable(const char *key, u64 *keylen, u64 keybufsize) { int rc; @@ -88,8 +86,7 @@ static int opal_get_next_variable(const char *key, uint64_t *keylen, return opal_status_to_err(rc); } -static int opal_set_variable(const char *key, uint64_t ksize, u8 *data, - uint64_t dsize) +static int opal_set_variable(const char *key, u64 ksize, u8 *data, u64 dsize) { int rc; diff --git a/security/integrity/platform_certs/load_powerpc.c b/security/integrity/platform_certs/load_powerpc.c index a2900cb85357..1e4f80a4e71c 100644 --- a/security/integrity/platform_certs/load_powerpc.c +++ b/security/integrity/platform_certs/load_powerpc.c @@ -18,7 +18,7 @@ /* * Get a certificate list blob from the named secure variable. */ -static __init void *get_cert_list(u8 *key, unsigned long keylen, uint64_t *size) +static __init void *get_cert_list(u8 *key, unsigned long keylen, u64 *size) { int rc; void *db; @@ -51,7 +51,7 @@ static __init void *get_cert_list(u8 *key, unsigned long keylen, uint64_t *size) static int __init load_powerpc_certs(void) { void *db = NULL, *dbx = NULL; - uint64_t dbsize = 0, dbxsize = 0; + u64 dbsize = 0, dbxsize = 0; int rc = 0; struct device_node *node; -- cgit v1.2.3 From 26149b02021158248b13e323f06372d87f076883 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:40 +1100 Subject: powerpc/secvar: Warn and error if multiple secvar ops are set The secvar code only supports one consumer at a time. Multiple consumers aren't possible at this point in time, but we'd want it to be obvious if it ever could happen. Signed-off-by: Russell Currey Co-developed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-6-ajd@linux.ibm.com --- arch/powerpc/include/asm/secvar.h | 4 ++-- arch/powerpc/kernel/secvar-ops.c | 10 ++++++++-- arch/powerpc/platforms/powernv/opal-secvar.c | 4 +--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h index 07ba36f868a7..a2b5f2203dc5 100644 --- a/arch/powerpc/include/asm/secvar.h +++ b/arch/powerpc/include/asm/secvar.h @@ -21,11 +21,11 @@ struct secvar_operations { #ifdef CONFIG_PPC_SECURE_BOOT -extern void set_secvar_ops(const struct secvar_operations *ops); +int set_secvar_ops(const struct secvar_operations *ops); #else -static inline void set_secvar_ops(const struct secvar_operations *ops) { } +static inline int set_secvar_ops(const struct secvar_operations *ops) { return 0; } #endif diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c index 6a29777d6a2d..19172a2804f0 100644 --- a/arch/powerpc/kernel/secvar-ops.c +++ b/arch/powerpc/kernel/secvar-ops.c @@ -8,10 +8,16 @@ #include #include +#include -const struct secvar_operations *secvar_ops __ro_after_init; +const struct secvar_operations *secvar_ops __ro_after_init = NULL; -void set_secvar_ops(const struct secvar_operations *ops) +int set_secvar_ops(const struct secvar_operations *ops) { + if (WARN_ON_ONCE(secvar_ops)) + return -EBUSY; + secvar_ops = ops; + + return 0; } diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c index ef89861569e0..4c0a3b030fe0 100644 --- a/arch/powerpc/platforms/powernv/opal-secvar.c +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -113,9 +113,7 @@ static int opal_secvar_probe(struct platform_device *pdev) return -ENODEV; } - set_secvar_ops(&opal_secvar_ops); - - return 0; + return set_secvar_ops(&opal_secvar_ops); } static const struct of_device_id opal_secvar_match[] = { -- cgit v1.2.3 From 16943a2faf94ef671e60c7577511c0d119fbdfc8 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:41 +1100 Subject: powerpc/secvar: Use sysfs_emit() instead of sprintf() The secvar format string and object size sysfs files are both ASCII text, and should use sysfs_emit(). No functional change. Suggested-by: Greg Kroah-Hartman Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-7-ajd@linux.ibm.com --- arch/powerpc/kernel/secvar-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 702044edf14d..b786d1005027 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -35,7 +35,7 @@ static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, if (rc) goto out; - rc = sprintf(buf, "%s\n", format); + rc = sysfs_emit(buf, "%s\n", format); out: of_node_put(node); @@ -57,7 +57,7 @@ static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, return rc; } - return sprintf(buf, "%llu\n", dsize); + return sysfs_emit(buf, "%llu\n", dsize); } static ssize_t data_read(struct file *filep, struct kobject *kobj, -- cgit v1.2.3 From ec2f40bd004b4b9142469282d4a6ce9afa22f9c0 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:42 +1100 Subject: powerpc/secvar: Handle format string in the consumer The code that handles the format string in secvar-sysfs.c is entirely OPAL specific, so create a new "format" op in secvar_operations to make the secvar code more generic. No functional change. Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-8-ajd@linux.ibm.com --- arch/powerpc/include/asm/secvar.h | 1 + arch/powerpc/kernel/secvar-sysfs.c | 27 +++++++++------------------ arch/powerpc/platforms/powernv/opal-secvar.c | 25 +++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h index a2b5f2203dc5..1a2c696a48ad 100644 --- a/arch/powerpc/include/asm/secvar.h +++ b/arch/powerpc/include/asm/secvar.h @@ -17,6 +17,7 @@ struct secvar_operations { int (*get)(const char *key, u64 key_len, u8 *data, u64 *data_size); int (*get_next)(const char *key, u64 *key_len, u64 keybufsize); int (*set)(const char *key, u64 key_len, u8 *data, u64 data_size); + ssize_t (*format)(char *buf, size_t bufsize); }; #ifdef CONFIG_PPC_SECURE_BOOT diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index b786d1005027..e4661559c855 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -21,26 +21,17 @@ static struct kset *secvar_kset; static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - ssize_t rc = 0; - struct device_node *node; - const char *format; - - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } + char tmp[32]; + ssize_t len = secvar_ops->format(tmp, sizeof(tmp)); - rc = of_property_read_string(node, "format", &format); - if (rc) - goto out; + if (len > 0) + return sysfs_emit(buf, "%s\n", tmp); + else if (len < 0) + pr_err("Error %zd reading format string\n", len); + else + pr_err("Got empty format string from backend\n"); - rc = sysfs_emit(buf, "%s\n", format); - -out: - of_node_put(node); - - return rc; + return -EIO; } diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c index 4c0a3b030fe0..e33bb703ecbc 100644 --- a/arch/powerpc/platforms/powernv/opal-secvar.c +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -98,10 +98,35 @@ static int opal_set_variable(const char *key, u64 ksize, u8 *data, u64 dsize) return opal_status_to_err(rc); } +static ssize_t opal_secvar_format(char *buf, size_t bufsize) +{ + ssize_t rc = 0; + struct device_node *node; + const char *format; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } + + rc = of_property_read_string(node, "format", &format); + if (rc) + goto out; + + rc = snprintf(buf, bufsize, "%s", format); + +out: + of_node_put(node); + + return rc; +} + static const struct secvar_operations opal_secvar_ops = { .get = opal_get_variable, .get_next = opal_get_next_variable, .set = opal_set_variable, + .format = opal_secvar_format, }; static int opal_secvar_probe(struct platform_device *pdev) -- cgit v1.2.3 From e02407944052554c1685e11e56175147d1ac56b6 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:43 +1100 Subject: powerpc/secvar: Handle max object size in the consumer Currently the max object size is handled in the core secvar code with an entirely OPAL-specific implementation, so create a new max_size() op and move the existing implementation into the powernv platform. Should be no functional change. Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-9-ajd@linux.ibm.com --- arch/powerpc/include/asm/secvar.h | 1 + arch/powerpc/kernel/secvar-sysfs.c | 17 +++-------------- arch/powerpc/platforms/powernv/opal-secvar.c | 22 ++++++++++++++++++++++ 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h index 1a2c696a48ad..bf396215903d 100644 --- a/arch/powerpc/include/asm/secvar.h +++ b/arch/powerpc/include/asm/secvar.h @@ -18,6 +18,7 @@ struct secvar_operations { int (*get_next)(const char *key, u64 *key_len, u64 keybufsize); int (*set)(const char *key, u64 key_len, u8 *data, u64 data_size); ssize_t (*format)(char *buf, size_t bufsize); + int (*max_size)(u64 *max_size); }; #ifdef CONFIG_PPC_SECURE_BOOT diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index e4661559c855..0966806f28c7 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -132,27 +132,16 @@ static struct kobj_type secvar_ktype = { static int update_kobj_size(void) { - struct device_node *node; u64 varsize; - int rc = 0; + int rc = secvar_ops->max_size(&varsize); - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } - - rc = of_property_read_u64(node, "max-var-size", &varsize); if (rc) - goto out; + return rc; data_attr.size = varsize; update_attr.size = varsize; -out: - of_node_put(node); - - return rc; + return 0; } static int secvar_sysfs_load(void) diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c index e33bb703ecbc..a8436bf35e2f 100644 --- a/arch/powerpc/platforms/powernv/opal-secvar.c +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -122,11 +122,33 @@ out: return rc; } +static int opal_secvar_max_size(u64 *max_size) +{ + int rc; + struct device_node *node; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!node) + return -ENODEV; + + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } + + rc = of_property_read_u64(node, "max-var-size", max_size); + +out: + of_node_put(node); + return rc; +} + static const struct secvar_operations opal_secvar_ops = { .get = opal_get_variable, .get_next = opal_get_next_variable, .set = opal_set_variable, .format = opal_secvar_format, + .max_size = opal_secvar_max_size, }; static int opal_secvar_probe(struct platform_device *pdev) -- cgit v1.2.3 From caefd3b77450e330845755ea57add2315fd5e4d9 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:44 +1100 Subject: powerpc/secvar: Clean up init error messages Remove unnecessary prefixes from error messages in secvar_sysfs_init() (the file defines pr_fmt, so putting "secvar:" in every message is unnecessary). Make capitalisation and punctuation more consistent. Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-10-ajd@linux.ibm.com --- arch/powerpc/kernel/secvar-sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 0966806f28c7..8f3deff94009 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -196,13 +196,13 @@ static int secvar_sysfs_init(void) int rc; if (!secvar_ops) { - pr_warn("secvar: failed to retrieve secvar operations.\n"); + pr_warn("Failed to retrieve secvar operations\n"); return -ENODEV; } secvar_kobj = kobject_create_and_add("secvar", firmware_kobj); if (!secvar_kobj) { - pr_err("secvar: Failed to create firmware kobj\n"); + pr_err("Failed to create firmware kobj\n"); return -ENOMEM; } @@ -214,7 +214,7 @@ static int secvar_sysfs_init(void) secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); if (!secvar_kset) { - pr_err("secvar: sysfs kobject registration failed.\n"); + pr_err("sysfs kobject registration failed\n"); kobject_put(secvar_kobj); return -ENOMEM; } -- cgit v1.2.3 From 86b6c0ae2caee9cadee1256d31b204ea54cb55c0 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:45 +1100 Subject: powerpc/secvar: Extend sysfs to include config vars The forthcoming pseries consumer of the secvar API wants to expose a number of config variables. Allowing secvar implementations to provide their own sysfs attributes makes it easy for consumers to expose what they need to. This is not being used by the OPAL secvar implementation at present, and the config directory will not be created if no attributes are set. Signed-off-by: Russell Currey Co-developed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-11-ajd@linux.ibm.com --- arch/powerpc/include/asm/secvar.h | 2 ++ arch/powerpc/kernel/secvar-sysfs.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h index bf396215903d..011a53a8076c 100644 --- a/arch/powerpc/include/asm/secvar.h +++ b/arch/powerpc/include/asm/secvar.h @@ -10,6 +10,7 @@ #include #include +#include extern const struct secvar_operations *secvar_ops; @@ -19,6 +20,7 @@ struct secvar_operations { int (*set)(const char *key, u64 key_len, u8 *data, u64 data_size); ssize_t (*format)(char *buf, size_t bufsize); int (*max_size)(u64 *max_size); + const struct attribute **config_attrs; }; #ifdef CONFIG_PPC_SECURE_BOOT diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 8f3deff94009..7df32be86507 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -144,6 +144,19 @@ static int update_kobj_size(void) return 0; } +static int secvar_sysfs_config(struct kobject *kobj) +{ + struct attribute_group config_group = { + .name = "config", + .attrs = (struct attribute **)secvar_ops->config_attrs, + }; + + if (secvar_ops->config_attrs) + return sysfs_create_group(kobj, &config_group); + + return 0; +} + static int secvar_sysfs_load(void) { struct kobject *kobj; @@ -208,26 +221,36 @@ static int secvar_sysfs_init(void) rc = sysfs_create_file(secvar_kobj, &format_attr.attr); if (rc) { - kobject_put(secvar_kobj); - return -ENOMEM; + pr_err("Failed to create format object\n"); + rc = -ENOMEM; + goto err; } secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); if (!secvar_kset) { pr_err("sysfs kobject registration failed\n"); - kobject_put(secvar_kobj); - return -ENOMEM; + rc = -ENOMEM; + goto err; } rc = update_kobj_size(); if (rc) { pr_err("Cannot read the size of the attribute\n"); - return rc; + goto err; + } + + rc = secvar_sysfs_config(secvar_kobj); + if (rc) { + pr_err("Failed to create config directory\n"); + goto err; } secvar_sysfs_load(); return 0; +err: + kobject_put(secvar_kobj); + return rc; } late_initcall(secvar_sysfs_init); -- cgit v1.2.3 From 50a466bf3e6f6f177dc0aeefa46a2f8927075a1d Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:46 +1100 Subject: powerpc/secvar: Allow backend to populate static list of variable names Currently, the list of variables is populated by calling secvar_ops->get_next() repeatedly, which is explicitly modelled on the OPAL API (including the keylen parameter). For the upcoming PLPKS backend, we have a static list of variable names. It is messy to fit that into get_next(), so instead, let the backend put a NULL-terminated array of variable names into secvar_ops->var_names, which will be used if get_next() is undefined. Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-12-ajd@linux.ibm.com --- arch/powerpc/include/asm/secvar.h | 4 +++ arch/powerpc/kernel/secvar-sysfs.c | 69 ++++++++++++++++++++++++++------------ 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h index 011a53a8076c..4828e0ab7e3c 100644 --- a/arch/powerpc/include/asm/secvar.h +++ b/arch/powerpc/include/asm/secvar.h @@ -21,6 +21,10 @@ struct secvar_operations { ssize_t (*format)(char *buf, size_t bufsize); int (*max_size)(u64 *max_size); const struct attribute **config_attrs; + + // NULL-terminated array of fixed variable names + // Only used if get_next() isn't provided + const char * const *var_names; }; #ifdef CONFIG_PPC_SECURE_BOOT diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 7df32be86507..bfb19f22c6ba 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -157,9 +157,31 @@ static int secvar_sysfs_config(struct kobject *kobj) return 0; } -static int secvar_sysfs_load(void) +static int add_var(const char *name) { struct kobject *kobj; + int rc; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return -ENOMEM; + + kobject_init(kobj, &secvar_ktype); + + rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); + if (rc) { + pr_warn("kobject_add error %d for attribute: %s\n", rc, + name); + kobject_put(kobj); + return rc; + } + + kobject_uevent(kobj, KOBJ_ADD); + return 0; +} + +static int secvar_sysfs_load(void) +{ u64 namesize = 0; char *name; int rc; @@ -179,31 +201,28 @@ static int secvar_sysfs_load(void) break; } - kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); - if (!kobj) { - rc = -ENOMEM; - break; - } - - kobject_init(kobj, &secvar_ktype); - - rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); - if (rc) { - pr_warn("kobject_add error %d for attribute: %s\n", rc, - name); - kobject_put(kobj); - kobj = NULL; - } - - if (kobj) - kobject_uevent(kobj, KOBJ_ADD); - + rc = add_var(name); } while (!rc); kfree(name); return rc; } +static int secvar_sysfs_load_static(void) +{ + const char * const *name_ptr = secvar_ops->var_names; + int rc; + + while (*name_ptr) { + rc = add_var(*name_ptr); + if (rc) + return rc; + name_ptr++; + } + + return 0; +} + static int secvar_sysfs_init(void) { int rc; @@ -245,7 +264,15 @@ static int secvar_sysfs_init(void) goto err; } - secvar_sysfs_load(); + if (secvar_ops->get_next) + rc = secvar_sysfs_load(); + else + rc = secvar_sysfs_load_static(); + + if (rc) { + pr_err("Failed to create variable attributes\n"); + goto err; + } return 0; err: -- cgit v1.2.3 From 6d64c497a31bd888110785def44529ebb96bce49 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:47 +1100 Subject: powerpc/secvar: Warn when PAGE_SIZE is smaller than max object size Due to sysfs constraints, when writing to a variable, we can only handle writes of up to PAGE_SIZE. It's possible that the maximum object size is larger than PAGE_SIZE, in which case, print a warning on boot so that the user is aware. Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-13-ajd@linux.ibm.com --- arch/powerpc/kernel/secvar-sysfs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index bfb19f22c6ba..6ba23b2bb9da 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -225,6 +225,7 @@ static int secvar_sysfs_load_static(void) static int secvar_sysfs_init(void) { + u64 max_size; int rc; if (!secvar_ops) { @@ -274,6 +275,14 @@ static int secvar_sysfs_init(void) goto err; } + // Due to sysfs limitations, we will only ever get a write buffer of + // up to 1 page in size. Print a warning if this is potentially going + // to cause problems, so that the user is aware. + secvar_ops->max_size(&max_size); + if (max_size > PAGE_SIZE) + pr_warn_ratelimited("PAGE_SIZE (%lu) is smaller than maximum object size (%llu), writes are limited to PAGE_SIZE\n", + PAGE_SIZE, max_size); + return 0; err: kobject_put(secvar_kobj); -- cgit v1.2.3 From c96db155ebc6be868d5dde1b5caf6879c181cda4 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:48 +1100 Subject: powerpc/secvar: Don't print error on ENOENT when reading variables If attempting to read the size or data attributes of a non-existent variable (which will be possible after a later patch to expose the PLPKS via the secvar interface), don't spam the kernel log with error messages. Only print errors for return codes that aren't ENOENT. Reported-by: Sudhakar Kuppusamy Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-14-ajd@linux.ibm.com --- arch/powerpc/kernel/secvar-sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 6ba23b2bb9da..eb3c053f323f 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -43,8 +43,8 @@ static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error retrieving %s variable size %d\n", kobj->name, - rc); + if (rc != -ENOENT) + pr_err("Error retrieving %s variable size %d\n", kobj->name, rc); return rc; } @@ -61,7 +61,8 @@ static ssize_t data_read(struct file *filep, struct kobject *kobj, rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error getting %s variable size %d\n", kobj->name, rc); + if (rc != -ENOENT) + pr_err("Error getting %s variable size %d\n", kobj->name, rc); return rc; } pr_debug("dsize is %llu\n", dsize); -- cgit v1.2.3 From 90b74e305d6b5a444b1283dd7ad1caf6acaa0340 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:49 +1100 Subject: powerpc/pseries: Move plpks.h to include directory Move plpks.h from platforms/pseries/ to include/asm/. This is necessary for later patches to make use of the PLPKS from code in other subsystems. Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-15-ajd@linux.ibm.com --- arch/powerpc/include/asm/plpks.h | 75 ++++++++++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/plpks.c | 3 +- arch/powerpc/platforms/pseries/plpks.h | 71 -------------------------------- 3 files changed, 76 insertions(+), 73 deletions(-) create mode 100644 arch/powerpc/include/asm/plpks.h delete mode 100644 arch/powerpc/platforms/pseries/plpks.h diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h new file mode 100644 index 000000000000..8295502ee93b --- /dev/null +++ b/arch/powerpc/include/asm/plpks.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 IBM Corporation + * Author: Nayna Jain + * + * Platform keystore for pseries LPAR(PLPKS). + */ + +#ifndef _ASM_POWERPC_PLPKS_H +#define _ASM_POWERPC_PLPKS_H + +#ifdef CONFIG_PSERIES_PLPKS + +#include +#include + +#define OSSECBOOTAUDIT 0x40000000 +#define OSSECBOOTENFORCE 0x20000000 +#define WORLDREADABLE 0x08000000 +#define SIGNEDUPDATE 0x01000000 + +#define PLPKS_VAR_LINUX 0x02 +#define PLPKS_VAR_COMMON 0x04 + +struct plpks_var { + char *component; + u8 *name; + u8 *data; + u32 policy; + u16 namelen; + u16 datalen; + u8 os; +}; + +struct plpks_var_name { + u8 *name; + u16 namelen; +}; + +struct plpks_var_name_list { + u32 varcount; + struct plpks_var_name varlist[]; +}; + +/** + * Writes the specified var and its data to PKS. + * Any caller of PKS driver should present a valid component type for + * their variable. + */ +int plpks_write_var(struct plpks_var var); + +/** + * Removes the specified var and its data from PKS. + */ +int plpks_remove_var(char *component, u8 varos, + struct plpks_var_name vname); + +/** + * Returns the data for the specified os variable. + */ +int plpks_read_os_var(struct plpks_var *var); + +/** + * Returns the data for the specified firmware variable. + */ +int plpks_read_fw_var(struct plpks_var *var); + +/** + * Returns the data for the specified bootloader variable. + */ +int plpks_read_bootloader_var(struct plpks_var *var); + +#endif // CONFIG_PSERIES_PLPKS + +#endif // _ASM_POWERPC_PLPKS_H diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index a01cf2ff140a..13e6daadb179 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -18,8 +18,7 @@ #include #include #include - -#include "plpks.h" +#include #define PKS_FW_OWNER 0x1 #define PKS_BOOTLOADER_OWNER 0x2 diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h deleted file mode 100644 index 275ccd86bfb5..000000000000 --- a/arch/powerpc/platforms/pseries/plpks.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2022 IBM Corporation - * Author: Nayna Jain - * - * Platform keystore for pseries LPAR(PLPKS). - */ - -#ifndef _PSERIES_PLPKS_H -#define _PSERIES_PLPKS_H - -#include -#include - -#define OSSECBOOTAUDIT 0x40000000 -#define OSSECBOOTENFORCE 0x20000000 -#define WORLDREADABLE 0x08000000 -#define SIGNEDUPDATE 0x01000000 - -#define PLPKS_VAR_LINUX 0x02 -#define PLPKS_VAR_COMMON 0x04 - -struct plpks_var { - char *component; - u8 *name; - u8 *data; - u32 policy; - u16 namelen; - u16 datalen; - u8 os; -}; - -struct plpks_var_name { - u8 *name; - u16 namelen; -}; - -struct plpks_var_name_list { - u32 varcount; - struct plpks_var_name varlist[]; -}; - -/** - * Writes the specified var and its data to PKS. - * Any caller of PKS driver should present a valid component type for - * their variable. - */ -int plpks_write_var(struct plpks_var var); - -/** - * Removes the specified var and its data from PKS. - */ -int plpks_remove_var(char *component, u8 varos, - struct plpks_var_name vname); - -/** - * Returns the data for the specified os variable. - */ -int plpks_read_os_var(struct plpks_var *var); - -/** - * Returns the data for the specified firmware variable. - */ -int plpks_read_fw_var(struct plpks_var *var); - -/** - * Returns the data for the specified bootloader variable. - */ -int plpks_read_bootloader_var(struct plpks_var *var); - -#endif -- cgit v1.2.3 From 3def7a3e7c2ce2ab5e5c54561da7125206851be4 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:50 +1100 Subject: powerpc/pseries: Move PLPKS constants to header file Move the constants defined in plpks.c to plpks.h, and standardise their naming, so that PLPKS consumers can make use of them later on. Signed-off-by: Russell Currey Co-developed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-16-ajd@linux.ibm.com --- arch/powerpc/include/asm/plpks.h | 36 ++++++++++++++++++--- arch/powerpc/platforms/pseries/plpks.c | 57 +++++++++++++--------------------- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index 8295502ee93b..6466aadd7145 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -14,14 +14,40 @@ #include #include -#define OSSECBOOTAUDIT 0x40000000 -#define OSSECBOOTENFORCE 0x20000000 -#define WORLDREADABLE 0x08000000 -#define SIGNEDUPDATE 0x01000000 +// Object policy flags from supported_policies +#define PLPKS_OSSECBOOTAUDIT PPC_BIT32(1) // OS secure boot must be audit/enforce +#define PLPKS_OSSECBOOTENFORCE PPC_BIT32(2) // OS secure boot must be enforce +#define PLPKS_PWSET PPC_BIT32(3) // No access without password set +#define PLPKS_WORLDREADABLE PPC_BIT32(4) // Readable without authentication +#define PLPKS_IMMUTABLE PPC_BIT32(5) // Once written, object cannot be removed +#define PLPKS_TRANSIENT PPC_BIT32(6) // Object does not persist through reboot +#define PLPKS_SIGNEDUPDATE PPC_BIT32(7) // Object can only be modified by signed updates +#define PLPKS_HVPROVISIONED PPC_BIT32(28) // Hypervisor has provisioned this object -#define PLPKS_VAR_LINUX 0x02 +// Signature algorithm flags from signed_update_algorithms +#define PLPKS_ALG_RSA2048 PPC_BIT(0) +#define PLPKS_ALG_RSA4096 PPC_BIT(1) + +// Object label OS metadata flags +#define PLPKS_VAR_LINUX 0x02 #define PLPKS_VAR_COMMON 0x04 +// Flags for which consumer owns an object is owned by +#define PLPKS_FW_OWNER 0x1 +#define PLPKS_BOOTLOADER_OWNER 0x2 +#define PLPKS_OS_OWNER 0x3 + +// Flags for label metadata fields +#define PLPKS_LABEL_VERSION 0 +#define PLPKS_MAX_LABEL_ATTR_SIZE 16 +#define PLPKS_MAX_NAME_SIZE 239 +#define PLPKS_MAX_DATA_SIZE 4000 + +// Timeouts for PLPKS operations +#define PLPKS_MAX_TIMEOUT 5000 // msec +#define PLPKS_FLUSH_SLEEP 10 // msec +#define PLPKS_FLUSH_SLEEP_RANGE 400 + struct plpks_var { char *component; u8 *name; diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 13e6daadb179..91f3f623a2c7 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -20,19 +20,6 @@ #include #include -#define PKS_FW_OWNER 0x1 -#define PKS_BOOTLOADER_OWNER 0x2 -#define PKS_OS_OWNER 0x3 - -#define LABEL_VERSION 0 -#define MAX_LABEL_ATTR_SIZE 16 -#define MAX_NAME_SIZE 239 -#define MAX_DATA_SIZE 4000 - -#define PKS_FLUSH_MAX_TIMEOUT 5000 //msec -#define PKS_FLUSH_SLEEP 10 //msec -#define PKS_FLUSH_SLEEP_RANGE 400 - static u8 *ospassword; static u16 ospasswordlength; @@ -59,7 +46,7 @@ struct label_attr { struct label { struct label_attr attr; - u8 name[MAX_NAME_SIZE]; + u8 name[PLPKS_MAX_NAME_SIZE]; size_t size; }; @@ -122,7 +109,7 @@ static int pseries_status_to_err(int rc) static int plpks_gen_password(void) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; - u8 *password, consumer = PKS_OS_OWNER; + u8 *password, consumer = PLPKS_OS_OWNER; int rc; // The password must not cross a page boundary, so we align to the next power of 2 @@ -159,7 +146,7 @@ static struct plpks_auth *construct_auth(u8 consumer) { struct plpks_auth *auth; - if (consumer > PKS_OS_OWNER) + if (consumer > PLPKS_OS_OWNER) return ERR_PTR(-EINVAL); // The auth structure must not cross a page boundary and must be @@ -171,7 +158,7 @@ static struct plpks_auth *construct_auth(u8 consumer) auth->version = 1; auth->consumer = consumer; - if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) + if (consumer == PLPKS_FW_OWNER || consumer == PLPKS_BOOTLOADER_OWNER) return auth; memcpy(auth->password, ospassword, ospasswordlength); @@ -191,7 +178,7 @@ static struct label *construct_label(char *component, u8 varos, u8 *name, struct label *label; size_t slen; - if (!name || namelen > MAX_NAME_SIZE) + if (!name || namelen > PLPKS_MAX_NAME_SIZE) return ERR_PTR(-EINVAL); slen = strlen(component); @@ -206,9 +193,9 @@ static struct label *construct_label(char *component, u8 varos, u8 *name, if (component) memcpy(&label->attr.prefix, component, slen); - label->attr.version = LABEL_VERSION; + label->attr.version = PLPKS_LABEL_VERSION; label->attr.os = varos; - label->attr.length = MAX_LABEL_ATTR_SIZE; + label->attr.length = PLPKS_MAX_LABEL_ATTR_SIZE; memcpy(&label->name, name, namelen); label->size = sizeof(struct label_attr) + namelen; @@ -274,10 +261,10 @@ static int plpks_confirm_object_flushed(struct label *label, break; } - usleep_range(PKS_FLUSH_SLEEP, - PKS_FLUSH_SLEEP + PKS_FLUSH_SLEEP_RANGE); - timeout = timeout + PKS_FLUSH_SLEEP; - } while (timeout < PKS_FLUSH_MAX_TIMEOUT); + usleep_range(PLPKS_FLUSH_SLEEP, + PLPKS_FLUSH_SLEEP + PLPKS_FLUSH_SLEEP_RANGE); + timeout = timeout + PLPKS_FLUSH_SLEEP; + } while (timeout < PLPKS_MAX_TIMEOUT); if (timed_out) return -ETIMEDOUT; @@ -293,13 +280,13 @@ int plpks_write_var(struct plpks_var var) int rc; if (!var.component || !var.data || var.datalen <= 0 || - var.namelen > MAX_NAME_SIZE || var.datalen > MAX_DATA_SIZE) + var.namelen > PLPKS_MAX_NAME_SIZE || var.datalen > PLPKS_MAX_DATA_SIZE) return -EINVAL; - if (var.policy & SIGNEDUPDATE) + if (var.policy & PLPKS_SIGNEDUPDATE) return -EINVAL; - auth = construct_auth(PKS_OS_OWNER); + auth = construct_auth(PLPKS_OS_OWNER); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -331,10 +318,10 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) struct label *label; int rc; - if (!component || vname.namelen > MAX_NAME_SIZE) + if (!component || vname.namelen > PLPKS_MAX_NAME_SIZE) return -EINVAL; - auth = construct_auth(PKS_OS_OWNER); + auth = construct_auth(PLPKS_OS_OWNER); if (IS_ERR(auth)) return PTR_ERR(auth); @@ -366,14 +353,14 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) u8 *output; int rc; - if (var->namelen > MAX_NAME_SIZE) + if (var->namelen > PLPKS_MAX_NAME_SIZE) return -EINVAL; auth = construct_auth(consumer); if (IS_ERR(auth)) return PTR_ERR(auth); - if (consumer == PKS_OS_OWNER) { + if (consumer == PLPKS_OS_OWNER) { label = construct_label(var->component, var->os, var->name, var->namelen); if (IS_ERR(label)) { @@ -388,7 +375,7 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) goto out_free_label; } - if (consumer == PKS_OS_OWNER) + if (consumer == PLPKS_OS_OWNER) rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), virt_to_phys(label), label->size, virt_to_phys(output), maxobjsize); @@ -428,17 +415,17 @@ out_free_auth: int plpks_read_os_var(struct plpks_var *var) { - return plpks_read_var(PKS_OS_OWNER, var); + return plpks_read_var(PLPKS_OS_OWNER, var); } int plpks_read_fw_var(struct plpks_var *var) { - return plpks_read_var(PKS_FW_OWNER, var); + return plpks_read_var(PLPKS_FW_OWNER, var); } int plpks_read_bootloader_var(struct plpks_var *var) { - return plpks_read_var(PKS_BOOTLOADER_OWNER, var); + return plpks_read_var(PLPKS_BOOTLOADER_OWNER, var); } static __init int pseries_plpks_init(void) -- cgit v1.2.3 From 119da30d037dced29118fb90afe683ff50313386 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Fri, 10 Feb 2023 19:03:51 +1100 Subject: powerpc/pseries: Expose PLPKS config values, support additional fields The plpks driver uses the H_PKS_GET_CONFIG hcall to retrieve configuration and status information about the PKS from the hypervisor. Update _plpks_get_config() to handle some additional fields. Add getter functions to allow the PKS configuration information to be accessed from other files. Validate that the values we're getting comply with the spec. While we're here, move the config struct in _plpks_get_config() off the stack - it's getting large and we also need to make sure it doesn't cross a page boundary. Signed-off-by: Nayna Jain [ajd: split patch, extend to support additional v3 API fields, minor fixes] Co-developed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-17-ajd@linux.ibm.com --- arch/powerpc/include/asm/plpks.h | 58 +++++++++++++ arch/powerpc/platforms/pseries/plpks.c | 149 ++++++++++++++++++++++++++++++--- 2 files changed, 195 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index 6466aadd7145..7c5f51a9af7c 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -96,6 +96,64 @@ int plpks_read_fw_var(struct plpks_var *var); */ int plpks_read_bootloader_var(struct plpks_var *var); +/** + * Returns if PKS is available on this LPAR. + */ +bool plpks_is_available(void); + +/** + * Returns version of the Platform KeyStore. + */ +u8 plpks_get_version(void); + +/** + * Returns hypervisor storage overhead per object, not including the size of + * the object or label. Only valid for config version >= 2 + */ +u16 plpks_get_objoverhead(void); + +/** + * Returns maximum password size. Must be >= 32 bytes + */ +u16 plpks_get_maxpwsize(void); + +/** + * Returns maximum object size supported by Platform KeyStore. + */ +u16 plpks_get_maxobjectsize(void); + +/** + * Returns maximum object label size supported by Platform KeyStore. + */ +u16 plpks_get_maxobjectlabelsize(void); + +/** + * Returns total size of the configured Platform KeyStore. + */ +u32 plpks_get_totalsize(void); + +/** + * Returns used space from the total size of the Platform KeyStore. + */ +u32 plpks_get_usedspace(void); + +/** + * Returns bitmask of policies supported by the hypervisor. + */ +u32 plpks_get_supportedpolicies(void); + +/** + * Returns maximum byte size of a single object supported by the hypervisor. + * Only valid for config version >= 3 + */ +u32 plpks_get_maxlargeobjectsize(void); + +/** + * Returns bitmask of signature algorithms supported for signed updates. + * Only valid for config version >= 3 + */ +u64 plpks_get_signedupdatealgorithms(void); + #endif // CONFIG_PSERIES_PLPKS #endif // _ASM_POWERPC_PLPKS_H diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 91f3f623a2c7..1189246b03dc 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -24,8 +24,16 @@ static u8 *ospassword; static u16 ospasswordlength; // Retrieved with H_PKS_GET_CONFIG +static u8 version; +static u16 objoverhead; static u16 maxpwsize; static u16 maxobjsize; +static s16 maxobjlabelsize; +static u32 totalsize; +static u32 usedspace; +static u32 supportedpolicies; +static u32 maxlargeobjectsize; +static u64 signedupdatealgorithms; struct plpks_auth { u8 version; @@ -206,32 +214,149 @@ static struct label *construct_label(char *component, u8 varos, u8 *name, static int _plpks_get_config(void) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; - struct { + struct config { u8 version; u8 flags; - __be32 rsvd0; + __be16 rsvd0; + __be16 objoverhead; __be16 maxpwsize; __be16 maxobjlabelsize; __be16 maxobjsize; __be32 totalsize; __be32 usedspace; __be32 supportedpolicies; - __be64 rsvd1; - } __packed config; + __be32 maxlargeobjectsize; + __be64 signedupdatealgorithms; + u8 rsvd1[476]; + } __packed * config; size_t size; - int rc; + int rc = 0; + + size = sizeof(*config); + + // Config struct must not cross a page boundary. So long as the struct + // size is a power of 2, this should be fine as alignment is guaranteed + config = kzalloc(size, GFP_KERNEL); + if (!config) { + rc = -ENOMEM; + goto err; + } + + rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(config), size); + + if (rc != H_SUCCESS) { + rc = pseries_status_to_err(rc); + goto err; + } + + version = config->version; + objoverhead = be16_to_cpu(config->objoverhead); + maxpwsize = be16_to_cpu(config->maxpwsize); + maxobjsize = be16_to_cpu(config->maxobjsize); + maxobjlabelsize = be16_to_cpu(config->maxobjlabelsize); + totalsize = be32_to_cpu(config->totalsize); + usedspace = be32_to_cpu(config->usedspace); + supportedpolicies = be32_to_cpu(config->supportedpolicies); + maxlargeobjectsize = be32_to_cpu(config->maxlargeobjectsize); + signedupdatealgorithms = be64_to_cpu(config->signedupdatealgorithms); + + // Validate that the numbers we get back match the requirements of the spec + if (maxpwsize < 32) { + pr_err("Invalid Max Password Size received from hypervisor (%d < 32)\n", maxpwsize); + rc = -EIO; + goto err; + } + + if (maxobjlabelsize < 255) { + pr_err("Invalid Max Object Label Size received from hypervisor (%d < 255)\n", + maxobjlabelsize); + rc = -EIO; + goto err; + } - size = sizeof(config); + if (totalsize < 4096) { + pr_err("Invalid Total Size received from hypervisor (%d < 4096)\n", totalsize); + rc = -EIO; + goto err; + } + + if (version >= 3 && maxlargeobjectsize >= 65536 && maxobjsize != 0xFFFF) { + pr_err("Invalid Max Object Size (0x%x != 0xFFFF)\n", maxobjsize); + rc = -EIO; + goto err; + } + +err: + kfree(config); + return rc; +} + +u8 plpks_get_version(void) +{ + return version; +} - rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(&config), size); +u16 plpks_get_objoverhead(void) +{ + return objoverhead; +} - if (rc != H_SUCCESS) - return pseries_status_to_err(rc); +u16 plpks_get_maxpwsize(void) +{ + return maxpwsize; +} - maxpwsize = be16_to_cpu(config.maxpwsize); - maxobjsize = be16_to_cpu(config.maxobjsize); +u16 plpks_get_maxobjectsize(void) +{ + return maxobjsize; +} + +u16 plpks_get_maxobjectlabelsize(void) +{ + return maxobjlabelsize; +} + +u32 plpks_get_totalsize(void) +{ + return totalsize; +} + +u32 plpks_get_usedspace(void) +{ + // Unlike other config values, usedspace regularly changes as objects + // are updated, so we need to refresh. + int rc = _plpks_get_config(); + if (rc) { + pr_err("Couldn't get config, rc: %d\n", rc); + return 0; + } + return usedspace; +} + +u32 plpks_get_supportedpolicies(void) +{ + return supportedpolicies; +} + +u32 plpks_get_maxlargeobjectsize(void) +{ + return maxlargeobjectsize; +} + +u64 plpks_get_signedupdatealgorithms(void) +{ + return signedupdatealgorithms; +} + +bool plpks_is_available(void) +{ + int rc; + + rc = _plpks_get_config(); + if (rc) + return false; - return 0; + return true; } static int plpks_confirm_object_flushed(struct label *label, -- cgit v1.2.3 From 899d9b8fee66da820eadc60b2a70090eb83db761 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Fri, 10 Feb 2023 19:03:52 +1100 Subject: powerpc/pseries: Implement signed update for PLPKS objects The Platform Keystore provides a signed update interface which can be used to create, replace or append to certain variables in the PKS in a secure fashion, with the hypervisor requiring that the update be signed using the Platform Key. Implement an interface to the H_PKS_SIGNED_UPDATE hcall in the plpks driver to allow signed updates to PKS objects. (The plpks driver doesn't need to do any cryptography or otherwise handle the actual signed variable contents - that will be handled by userspace tooling.) Signed-off-by: Nayna Jain [ajd: split patch, add timeout handling and misc cleanups] Co-developed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-18-ajd@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 1 + arch/powerpc/include/asm/plpks.h | 5 +++ arch/powerpc/platforms/pseries/plpks.c | 74 +++++++++++++++++++++++++++++++--- 3 files changed, 75 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 95fd7f9485d5..c099780385dd 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -335,6 +335,7 @@ #define H_RPT_INVALIDATE 0x448 #define H_SCM_FLUSH 0x44C #define H_GET_ENERGY_SCALE_INFO 0x450 +#define H_PKS_SIGNED_UPDATE 0x454 #define H_WATCHDOG 0x45C #define MAX_HCALL_OPCODE H_WATCHDOG diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index 7c5f51a9af7c..e7204e6c0ca4 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -68,6 +68,11 @@ struct plpks_var_name_list { struct plpks_var_name varlist[]; }; +/** + * Updates the authenticated variable. It expects NULL as the component. + */ +int plpks_signed_update_var(struct plpks_var *var, u64 flags); + /** * Writes the specified var and its data to PKS. * Any caller of PKS driver should present a valid component type for diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 1189246b03dc..cee06fb9a370 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -81,6 +81,12 @@ static int pseries_status_to_err(int rc) err = -ENOENT; break; case H_BUSY: + case H_LONG_BUSY_ORDER_1_MSEC: + case H_LONG_BUSY_ORDER_10_MSEC: + case H_LONG_BUSY_ORDER_100_MSEC: + case H_LONG_BUSY_ORDER_1_SEC: + case H_LONG_BUSY_ORDER_10_SEC: + case H_LONG_BUSY_ORDER_100_SEC: err = -EBUSY; break; case H_AUTHORITY: @@ -184,14 +190,17 @@ static struct label *construct_label(char *component, u8 varos, u8 *name, u16 namelen) { struct label *label; - size_t slen; + size_t slen = 0; if (!name || namelen > PLPKS_MAX_NAME_SIZE) return ERR_PTR(-EINVAL); - slen = strlen(component); - if (component && slen > sizeof(label->attr.prefix)) - return ERR_PTR(-EINVAL); + // Support NULL component for signed updates + if (component) { + slen = strlen(component); + if (slen > sizeof(label->attr.prefix)) + return ERR_PTR(-EINVAL); + } // The label structure must not cross a page boundary, so we align to the next power of 2 label = kzalloc(roundup_pow_of_two(sizeof(*label)), GFP_KERNEL); @@ -397,6 +406,61 @@ static int plpks_confirm_object_flushed(struct label *label, return pseries_status_to_err(rc); } +int plpks_signed_update_var(struct plpks_var *var, u64 flags) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + int rc; + struct label *label; + struct plpks_auth *auth; + u64 continuetoken = 0; + u64 timeout = 0; + + if (!var->data || var->datalen <= 0 || var->namelen > PLPKS_MAX_NAME_SIZE) + return -EINVAL; + + if (!(var->policy & PLPKS_SIGNEDUPDATE)) + return -EINVAL; + + // Signed updates need the component to be NULL. + if (var->component) + return -EINVAL; + + auth = construct_auth(PLPKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var->component, var->os, var->name, var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + do { + rc = plpar_hcall9(H_PKS_SIGNED_UPDATE, retbuf, + virt_to_phys(auth), virt_to_phys(label), + label->size, var->policy, flags, + virt_to_phys(var->data), var->datalen, + continuetoken); + + continuetoken = retbuf[0]; + if (pseries_status_to_err(rc) == -EBUSY) { + int delay_ms = get_longbusy_msecs(rc); + mdelay(delay_ms); + timeout += delay_ms; + } + rc = pseries_status_to_err(rc); + } while (rc == -EBUSY && timeout < PLPKS_MAX_TIMEOUT); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + kfree(label); +out: + kfree(auth); + + return rc; +} + int plpks_write_var(struct plpks_var var) { unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; @@ -443,7 +507,7 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) struct label *label; int rc; - if (!component || vname.namelen > PLPKS_MAX_NAME_SIZE) + if (vname.namelen > PLPKS_MAX_NAME_SIZE) return -EINVAL; auth = construct_auth(PLPKS_OS_OWNER); -- cgit v1.2.3 From ebdcd42347157647ffe6c4d2808e4e5c146475d3 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:53 +1100 Subject: powerpc/pseries: Log hcall return codes for PLPKS debug The plpks code converts hypervisor return codes into their Linux equivalents so that users can understand them. Having access to the original return codes is really useful for debugging, so add a pr_debug() so we don't lose information from the conversion. Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-19-ajd@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index cee06fb9a370..e5755443d4a4 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -117,6 +117,8 @@ static int pseries_status_to_err(int rc) err = -EINVAL; } + pr_debug("Converted hypervisor code %d to Linux %d\n", rc, err); + return err; } -- cgit v1.2.3 From 0cf2cc1fe4e2e7a37da077cdd3fba5cfd9a6a36c Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:54 +1100 Subject: powerpc/pseries: Make caller pass buffer to plpks_read_var() Currently, plpks_read_var() allocates a buffer to pass to the H_PKS_READ_OBJECT hcall, then allocates another buffer into which the data is copied, and returns that buffer to the caller. This is a bit over the top - while we probably still want to allocate a separate buffer to pass to the hypervisor in the hcall, we can let the caller allocate the final buffer and specify the size. Don't allocate var->data in plpks_read_var(), instead expect the caller to allocate it. If the caller needs to discover the size, it can set var->data to NULL and var->datalen will be populated. Update header file to document this. Suggested-by: Michael Ellerman Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-20-ajd@linux.ibm.com --- arch/powerpc/include/asm/plpks.h | 12 ++++++++++++ arch/powerpc/platforms/pseries/plpks.c | 11 ++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index e7204e6c0ca4..0c49969b0864 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -88,16 +88,28 @@ int plpks_remove_var(char *component, u8 varos, /** * Returns the data for the specified os variable. + * + * Caller must allocate a buffer in var->data with length in var->datalen. + * If no buffer is provided, var->datalen will be populated with the object's + * size. */ int plpks_read_os_var(struct plpks_var *var); /** * Returns the data for the specified firmware variable. + * + * Caller must allocate a buffer in var->data with length in var->datalen. + * If no buffer is provided, var->datalen will be populated with the object's + * size. */ int plpks_read_fw_var(struct plpks_var *var); /** * Returns the data for the specified bootloader variable. + * + * Caller must allocate a buffer in var->data with length in var->datalen. + * If no buffer is provided, var->datalen will be populated with the object's + * size. */ int plpks_read_bootloader_var(struct plpks_var *var); diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index e5755443d4a4..926b6a927326 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -581,17 +581,14 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var) goto out_free_output; } - if (var->datalen == 0 || var->datalen > retbuf[0]) + if (!var->data || var->datalen > retbuf[0]) var->datalen = retbuf[0]; - var->data = kzalloc(var->datalen, GFP_KERNEL); - if (!var->data) { - rc = -ENOMEM; - goto out_free_output; - } var->policy = retbuf[1]; - memcpy(var->data, output, var->datalen); + if (var->data) + memcpy(var->data, output, var->datalen); + rc = 0; out_free_output: -- cgit v1.2.3 From 46b2cbebac1e862e4c8317aa26e7d7d632242c2f Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:55 +1100 Subject: powerpc/pseries: Turn PSERIES_PLPKS into a hidden option It seems a bit unnecessary for the PLPKS code to have a user-visible config option when it doesn't do anything on its own, and there's existing options for enabling Secure Boot-related features. It should be enabled by PPC_SECURE_BOOT, which will eventually be what uses PLPKS to populate keyrings. However, we can't get of the separate option completely, because it will also be used for SED Opal purposes. Change PSERIES_PLPKS into a hidden option, which is selected by PPC_SECURE_BOOT. Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-21-ajd@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/platforms/pseries/Kconfig | 19 +++++++++---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 006f019fde63..2c9cdf1d8761 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1042,6 +1042,7 @@ config PPC_SECURE_BOOT depends on PPC_POWERNV || PPC_PSERIES depends on IMA_ARCH_POLICY imply IMA_SECURE_AND_OR_TRUSTED_BOOT + select PSERIES_PLPKS if PPC_PSERIES help Systems with firmware secure boot enabled need to define security policies to extend secure boot to the OS. This config allows a user diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index a3b4d99567cb..e51d65969318 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -151,16 +151,15 @@ config IBMEBUS config PSERIES_PLPKS depends on PPC_PSERIES - bool "Support for the Platform Key Storage" - help - PowerVM provides an isolated Platform Keystore(PKS) storage - allocation for each LPAR with individually managed access - controls to store sensitive information securely. It can be - used to store asymmetric public keys or secrets as required - by different usecases. Select this config to enable - operating system interface to hypervisor to access this space. - - If unsure, select N. + bool + # PowerVM provides an isolated Platform Keystore (PKS) storage + # allocation for each LPAR with individually managed access + # controls to store sensitive information securely. It can be + # used to store asymmetric public keys or secrets as required + # by different usecases. + # + # This option is selected by in-kernel consumers that require + # access to the PKS. config PAPR_SCM depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM -- cgit v1.2.3 From ca4f1d221c84fe364517b15af65f3f0e4ce9719a Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 10 Feb 2023 19:03:56 +1100 Subject: powerpc/pseries: Clarify warning when PLPKS password already set When the H_PKS_GEN_PASSWORD hcall returns H_IN_USE, operations that require authentication (i.e. anything other than reading a world-readable variable) will not work. The current error message doesn't explain this clearly enough. Reword it to emphasise that authenticated operations will fail. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-22-ajd@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 926b6a927326..01ae919b4497 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -146,7 +146,7 @@ static int plpks_gen_password(void) memcpy(ospassword, password, ospasswordlength); } else { if (rc == H_IN_USE) { - pr_warn("Password is already set for POWER LPAR Platform KeyStore\n"); + pr_warn("Password already set - authenticated operations will fail\n"); rc = 0; } else { goto out; -- cgit v1.2.3 From 9ee76bd5c7e39b622660cc14833ead1967f2038d Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:57 +1100 Subject: powerpc/pseries: Add helper to get PLPKS password length Add helper function to get the PLPKS password length. This will be used in a later patch to support passing the password between kernels over kexec. Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-23-ajd@linux.ibm.com --- arch/powerpc/include/asm/plpks.h | 5 +++++ arch/powerpc/platforms/pseries/plpks.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index 0c49969b0864..757313e00521 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -171,6 +171,11 @@ u32 plpks_get_maxlargeobjectsize(void); */ u64 plpks_get_signedupdatealgorithms(void); +/** + * Returns the length of the PLPKS password in bytes. + */ +u16 plpks_get_passwordlen(void); + #endif // CONFIG_PSERIES_PLPKS #endif // _ASM_POWERPC_PLPKS_H diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 01ae919b4497..671a10acaebf 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -359,6 +359,11 @@ u64 plpks_get_signedupdatealgorithms(void) return signedupdatealgorithms; } +u16 plpks_get_passwordlen(void) +{ + return ospasswordlength; +} + bool plpks_is_available(void) { int rc; -- cgit v1.2.3 From 91361b5175d2b3704f7e436d0071893c839e1199 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:58 +1100 Subject: powerpc/pseries: Pass PLPKS password on kexec Before interacting with the PLPKS, we ask the hypervisor to generate a password for the current boot, which is then required for most further PLPKS operations. If we kexec into a new kernel, the new kernel will try and fail to generate a new password, as the password has already been set. Pass the password through to the new kernel via the device tree, in /chosen/ibm,plpks-pw. Check for the presence of this property before trying to generate a new password - if it exists, use the existing password and remove it from the device tree. This only works with the kexec_file_load() syscall, not the older kexec_load() syscall, however if you're using Secure Boot then you want to be using kexec_file_load() anyway. Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-24-ajd@linux.ibm.com --- arch/powerpc/include/asm/plpks.h | 14 ++++++++ arch/powerpc/kernel/prom.c | 4 +++ arch/powerpc/kexec/file_load_64.c | 18 +++++++--- arch/powerpc/platforms/pseries/plpks.c | 61 ++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index 757313e00521..23b77027c916 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -176,6 +176,20 @@ u64 plpks_get_signedupdatealgorithms(void); */ u16 plpks_get_passwordlen(void); +/** + * Called in early init to retrieve and clear the PLPKS password from the DT. + */ +void plpks_early_init_devtree(void); + +/** + * Populates the FDT with the PLPKS password to prepare for kexec. + */ +int plpks_populate_fdt(void *fdt); +#else // CONFIG_PSERIES_PLPKS +static inline bool plpks_is_available(void) { return false; } +static inline u16 plpks_get_passwordlen(void) { BUILD_BUG(); } +static inline void plpks_early_init_devtree(void) { } +static inline int plpks_populate_fdt(void *fdt) { BUILD_BUG(); } #endif // CONFIG_PSERIES_PLPKS #endif // _ASM_POWERPC_PLPKS_H diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f318e8e1f3fe..9d9ee4e9e1a1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -887,6 +888,9 @@ void __init early_init_devtree(void *params) powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; #endif + /* If kexec left a PLPKS password in the DT, get it and clear it */ + plpks_early_init_devtree(); + tm_init(); DBG(" <- early_init_devtree()\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 9be3e818a240..ab80c492da31 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -28,6 +28,7 @@ #include #include #include +#include struct umem_info { u64 *buf; /* data buffer for usable-memory property */ @@ -978,12 +979,17 @@ static unsigned int cpu_node_size(void) */ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) { - unsigned int cpu_nodes, extra_size; + unsigned int cpu_nodes, extra_size = 0; struct device_node *dn; u64 usm_entries; + // Budget some space for the password blob. There's already extra space + // for the key name + if (plpks_is_available()) + extra_size += (unsigned int)plpks_get_passwordlen(); + if (image->type != KEXEC_TYPE_CRASH) - return 0; + return extra_size; /* * For kdump kernel, account for linux,usable-memory and @@ -993,9 +999,7 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) if (drmem_lmb_size()) { usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); - extra_size = (unsigned int)(usm_entries * sizeof(u64)); - } else { - extra_size = 0; + extra_size += (unsigned int)(usm_entries * sizeof(u64)); } /* @@ -1234,6 +1238,10 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, } } + // If we have PLPKS active, we need to provide the password to the new kernel + if (plpks_is_available()) + ret = plpks_populate_fdt(fdt); + out: kfree(rmem); kfree(umem); diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index 671a10acaebf..cdf09e5bd741 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -16,6 +16,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -128,6 +131,12 @@ static int plpks_gen_password(void) u8 *password, consumer = PLPKS_OS_OWNER; int rc; + // If we booted from kexec, we could be reusing an existing password already + if (ospassword) { + pr_debug("Password of length %u already in use\n", ospasswordlength); + return 0; + } + // The password must not cross a page boundary, so we align to the next power of 2 password = kzalloc(roundup_pow_of_two(maxpwsize), GFP_KERNEL); if (!password) @@ -621,6 +630,58 @@ int plpks_read_bootloader_var(struct plpks_var *var) return plpks_read_var(PLPKS_BOOTLOADER_OWNER, var); } +int plpks_populate_fdt(void *fdt) +{ + int chosen_offset = fdt_path_offset(fdt, "/chosen"); + + if (chosen_offset < 0) { + pr_err("Can't find chosen node: %s\n", + fdt_strerror(chosen_offset)); + return chosen_offset; + } + + return fdt_setprop(fdt, chosen_offset, "ibm,plpks-pw", ospassword, ospasswordlength); +} + +// Once a password is registered with the hypervisor it cannot be cleared without +// rebooting the LPAR, so to keep using the PLPKS across kexec boots we need to +// recover the previous password from the FDT. +// +// There are a few challenges here. We don't want the password to be visible to +// users, so we need to clear it from the FDT. This has to be done in early boot. +// Clearing it from the FDT would make the FDT's checksum invalid, so we have to +// manually cause the checksum to be recalculated. +void __init plpks_early_init_devtree(void) +{ + void *fdt = initial_boot_params; + int chosen_node = fdt_path_offset(fdt, "/chosen"); + const u8 *password; + int len; + + if (chosen_node < 0) + return; + + password = fdt_getprop(fdt, chosen_node, "ibm,plpks-pw", &len); + if (len <= 0) { + pr_debug("Couldn't find ibm,plpks-pw node.\n"); + return; + } + + ospassword = memblock_alloc_raw(len, SMP_CACHE_BYTES); + if (!ospassword) { + pr_err("Error allocating memory for password.\n"); + goto out; + } + + memcpy(ospassword, password, len); + ospasswordlength = (u16)len; + +out: + fdt_nop_property(fdt, chosen_node, "ibm,plpks-pw"); + // Since we've cleared the password, we must update the FDT checksum + early_init_dt_verify(fdt); +} + static __init int pseries_plpks_init(void) { int rc; -- cgit v1.2.3 From ccadf154cb00b9ee9618d209aa3efc54b35a34b4 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:03:59 +1100 Subject: powerpc/pseries: Implement secvars for dynamic secure boot The pseries platform can support dynamic secure boot (i.e. secure boot using user-defined keys) using variables contained with the PowerVM LPAR Platform KeyStore (PLPKS). Using the powerpc secvar API, expose the relevant variables for pseries dynamic secure boot through the existing secvar filesystem layout. The relevant variables for dynamic secure boot are signed in the keystore, and can only be modified using the H_PKS_SIGNED_UPDATE hcall. Object labels in the keystore are encoded using ucs2 format. With our fixed variable names we don't have to care about encoding outside of the necessary byte padding. When a user writes to a variable, the first 8 bytes of data must contain the signed update flags as defined by the hypervisor. When a user reads a variable, the first 4 bytes of data contain the policies defined for the object. Limitations exist due to the underlying implementation of sysfs binary attributes, as is the case for the OPAL secvar implementation - partial writes are unsupported and writes cannot be larger than PAGE_SIZE. (Even when using bin_attributes, which can be larger than a single page, sysfs only gives us one page's worth of write buffer at a time, and the hypervisor does not expose an interface for partial writes.) Co-developed-by: Nayna Jain Signed-off-by: Nayna Jain Co-developed-by: Andrew Donnellan Signed-off-by: Andrew Donnellan Signed-off-by: Russell Currey [mpe: Add NLS dependency to fix build errors, squash fix from ajd] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-25-ajd@linux.ibm.com --- Documentation/ABI/testing/sysfs-secvar | 75 ++++++++- arch/powerpc/platforms/pseries/Kconfig | 1 + arch/powerpc/platforms/pseries/Makefile | 4 +- arch/powerpc/platforms/pseries/plpks-secvar.c | 216 ++++++++++++++++++++++++++ 4 files changed, 293 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/platforms/pseries/plpks-secvar.c diff --git a/Documentation/ABI/testing/sysfs-secvar b/Documentation/ABI/testing/sysfs-secvar index feebb8c57294..857cf12b0904 100644 --- a/Documentation/ABI/testing/sysfs-secvar +++ b/Documentation/ABI/testing/sysfs-secvar @@ -18,6 +18,14 @@ Description: A string indicating which backend is in use by the firmware. This determines the format of the variable and the accepted format of variable updates. + On powernv/OPAL, this value is provided by the OPAL firmware + and is expected to be "ibm,edk2-compat-v1". + + On pseries/PLPKS, this is generated by the kernel based on the + version number in the SB_VERSION variable in the keystore, and + has the form "ibm,plpks-sb-v", or + "ibm,plpks-sb-unknown" if there is no SB_VERSION variable. + What: /sys/firmware/secvar/vars/ Date: August 2019 Contact: Nayna Jain @@ -34,7 +42,7 @@ Description: An integer representation of the size of the content of the What: /sys/firmware/secvar/vars//data Date: August 2019 -Contact: Nayna Jain h +Contact: Nayna Jain Description: A read-only file containing the value of the variable. The size of the file represents the maximum size of the variable data. @@ -44,3 +52,68 @@ Contact: Nayna Jain Description: A write-only file that is used to submit the new value for the variable. The size of the file represents the maximum size of the variable data that can be written. + +What: /sys/firmware/secvar/config +Date: February 2023 +Contact: Nayna Jain +Description: This optional directory contains read-only config attributes as + defined by the secure variable implementation. All data is in + ASCII format. The directory is only created if the backing + implementation provides variables to populate it, which at + present is only PLPKS on the pseries platform. + +What: /sys/firmware/secvar/config/version +Date: February 2023 +Contact: Nayna Jain +Description: Config version as reported by the hypervisor in ASCII decimal + format. + + Currently only provided by PLPKS on the pseries platform. + +What: /sys/firmware/secvar/config/max_object_size +Date: February 2023 +Contact: Nayna Jain +Description: Maximum allowed size of objects in the keystore in bytes, + represented in ASCII decimal format. + + This is not necessarily the same as the max size that can be + written to an update file as writes can contain more than + object data, you should use the size of the update file for + that purpose. + + Currently only provided by PLPKS on the pseries platform. + +What: /sys/firmware/secvar/config/total_size +Date: February 2023 +Contact: Nayna Jain +Description: Total size of the PLPKS in bytes, represented in ASCII decimal + format. + + Currently only provided by PLPKS on the pseries platform. + +What: /sys/firmware/secvar/config/used_space +Date: February 2023 +Contact: Nayna Jain +Description: Current space consumed by the key store, in bytes, represented + in ASCII decimal format. + + Currently only provided by PLPKS on the pseries platform. + +What: /sys/firmware/secvar/config/supported_policies +Date: February 2023 +Contact: Nayna Jain +Description: Bitmask of supported policy flags by the hypervisor, + represented as an 8 byte hexadecimal ASCII string. Consult the + hypervisor documentation for what these flags are. + + Currently only provided by PLPKS on the pseries platform. + +What: /sys/firmware/secvar/config/signed_update_algorithms +Date: February 2023 +Contact: Nayna Jain +Description: Bitmask of flags indicating which algorithms the hypervisor + supports for signed update of objects, represented as a 16 byte + hexadecimal ASCII string. Consult the hypervisor documentation + for what these flags mean. + + Currently only provided by PLPKS on the pseries platform. diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index e51d65969318..b481c5c8bae1 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -151,6 +151,7 @@ config IBMEBUS config PSERIES_PLPKS depends on PPC_PSERIES + select NLS bool # PowerVM provides an isolated Platform Keystore (PKS) storage # allocation for each LPAR with individually managed access diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 92310202bdd7..20a0f3c3fe04 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -27,8 +27,8 @@ obj-$(CONFIG_PAPR_SCM) += papr_scm.o obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o -obj-$(CONFIG_PSERIES_PLPKS) += plpks.o - +obj-$(CONFIG_PSERIES_PLPKS) += plpks.o +obj-$(CONFIG_PPC_SECURE_BOOT) += plpks-secvar.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o diff --git a/arch/powerpc/platforms/pseries/plpks-secvar.c b/arch/powerpc/platforms/pseries/plpks-secvar.c new file mode 100644 index 000000000000..f6c8888f4076 --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks-secvar.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0-only + +// Secure variable implementation using the PowerVM LPAR Platform KeyStore (PLPKS) +// +// Copyright 2022, 2023 IBM Corporation +// Authors: Russell Currey +// Andrew Donnellan +// Nayna Jain + +#define pr_fmt(fmt) "secvar: "fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Config attributes for sysfs +#define PLPKS_CONFIG_ATTR(name, fmt, func) \ + static ssize_t name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ + { \ + return sysfs_emit(buf, fmt, func()); \ + } \ + static struct kobj_attribute attr_##name = __ATTR_RO(name) + +PLPKS_CONFIG_ATTR(version, "%u\n", plpks_get_version); +PLPKS_CONFIG_ATTR(max_object_size, "%u\n", plpks_get_maxobjectsize); +PLPKS_CONFIG_ATTR(total_size, "%u\n", plpks_get_totalsize); +PLPKS_CONFIG_ATTR(used_space, "%u\n", plpks_get_usedspace); +PLPKS_CONFIG_ATTR(supported_policies, "%08x\n", plpks_get_supportedpolicies); +PLPKS_CONFIG_ATTR(signed_update_algorithms, "%016llx\n", plpks_get_signedupdatealgorithms); + +static const struct attribute *config_attrs[] = { + &attr_version.attr, + &attr_max_object_size.attr, + &attr_total_size.attr, + &attr_used_space.attr, + &attr_supported_policies.attr, + &attr_signed_update_algorithms.attr, + NULL, +}; + +static u32 get_policy(const char *name) +{ + if ((strcmp(name, "db") == 0) || + (strcmp(name, "dbx") == 0) || + (strcmp(name, "grubdb") == 0) || + (strcmp(name, "grubdbx") == 0) || + (strcmp(name, "sbat") == 0)) + return (PLPKS_WORLDREADABLE | PLPKS_SIGNEDUPDATE); + else + return PLPKS_SIGNEDUPDATE; +} + +static const char * const plpks_var_names[] = { + "PK", + "KEK", + "db", + "dbx", + "grubdb", + "grubdbx", + "sbat", + "moduledb", + "trustedcadb", + NULL, +}; + +static int plpks_get_variable(const char *key, u64 key_len, u8 *data, + u64 *data_size) +{ + struct plpks_var var = {0}; + int rc = 0; + + // We subtract 1 from key_len because we don't need to include the + // null terminator at the end of the string + var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL); + if (!var.name) + return -ENOMEM; + rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name, + key_len - 1); + if (rc < 0) + goto err; + var.namelen = rc * 2; + + var.os = PLPKS_VAR_LINUX; + if (data) { + var.data = data; + var.datalen = *data_size; + } + rc = plpks_read_os_var(&var); + + if (rc) + goto err; + + *data_size = var.datalen; + +err: + kfree(var.name); + if (rc && rc != -ENOENT) { + pr_err("Failed to read variable '%s': %d\n", key, rc); + // Return -EIO since userspace probably doesn't care about the + // specific error + rc = -EIO; + } + return rc; +} + +static int plpks_set_variable(const char *key, u64 key_len, u8 *data, + u64 data_size) +{ + struct plpks_var var = {0}; + int rc = 0; + u64 flags; + + // Secure variables need to be prefixed with 8 bytes of flags. + // We only want to perform the write if we have at least one byte of data. + if (data_size <= sizeof(flags)) + return -EINVAL; + + // We subtract 1 from key_len because we don't need to include the + // null terminator at the end of the string + var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL); + if (!var.name) + return -ENOMEM; + rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name, + key_len - 1); + if (rc < 0) + goto err; + var.namelen = rc * 2; + + memcpy(&flags, data, sizeof(flags)); + + var.datalen = data_size - sizeof(flags); + var.data = data + sizeof(flags); + var.os = PLPKS_VAR_LINUX; + var.policy = get_policy(key); + + // Unlike in the read case, the plpks error code can be useful to + // userspace on write, so we return it rather than just -EIO + rc = plpks_signed_update_var(&var, flags); + +err: + kfree(var.name); + return rc; +} + +// PLPKS dynamic secure boot doesn't give us a format string in the same way OPAL does. +// Instead, report the format using the SB_VERSION variable in the keystore. +// The string is made up by us, and takes the form "ibm,plpks-sb-v" (or "ibm,plpks-sb-unknown" +// if the SB_VERSION variable doesn't exist). Hypervisor defines the SB_VERSION variable as a +// "1 byte unsigned integer value". +static ssize_t plpks_secvar_format(char *buf, size_t bufsize) +{ + struct plpks_var var = {0}; + ssize_t ret; + u8 version; + + var.component = NULL; + // Only the signed variables have null bytes in their names, this one doesn't + var.name = "SB_VERSION"; + var.namelen = strlen(var.name); + var.datalen = 1; + var.data = &version; + + // Unlike the other vars, SB_VERSION is owned by firmware instead of the OS + ret = plpks_read_fw_var(&var); + if (ret) { + if (ret == -ENOENT) { + ret = snprintf(buf, bufsize, "ibm,plpks-sb-unknown"); + } else { + pr_err("Error %ld reading SB_VERSION from firmware\n", ret); + ret = -EIO; + } + goto err; + } + + ret = snprintf(buf, bufsize, "ibm,plpks-sb-v%hhu", version); +err: + return ret; +} + +static int plpks_max_size(u64 *max_size) +{ + // The max object size reported by the hypervisor is accurate for the + // object itself, but we use the first 8 bytes of data on write as the + // signed update flags, so the max size a user can write is larger. + *max_size = (u64)plpks_get_maxobjectsize() + sizeof(u64); + + return 0; +} + + +static const struct secvar_operations plpks_secvar_ops = { + .get = plpks_get_variable, + .set = plpks_set_variable, + .format = plpks_secvar_format, + .max_size = plpks_max_size, + .config_attrs = config_attrs, + .var_names = plpks_var_names, +}; + +static int plpks_secvar_init(void) +{ + if (!plpks_is_available()) + return -ENODEV; + + return set_secvar_ops(&plpks_secvar_ops); +} +machine_device_initcall(pseries, plpks_secvar_init); -- cgit v1.2.3 From 3c8069b0c3832674abd80a5cf019c913e62de9a5 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:04:00 +1100 Subject: integrity/powerpc: Improve error handling & reporting when loading certs A few improvements to load_powerpc.c: - include integrity.h for the pr_fmt() - move all error reporting out of get_cert_list() - use ERR_PTR() to better preserve error detail - don't use pr_err() for missing keys Reviewed-by: Mimi Zohar Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-26-ajd@linux.ibm.com --- security/integrity/platform_certs/load_powerpc.c | 26 ++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/security/integrity/platform_certs/load_powerpc.c b/security/integrity/platform_certs/load_powerpc.c index 1e4f80a4e71c..dee51606d5f4 100644 --- a/security/integrity/platform_certs/load_powerpc.c +++ b/security/integrity/platform_certs/load_powerpc.c @@ -14,9 +14,15 @@ #include #include #include "keyring_handler.h" +#include "../integrity.h" /* * Get a certificate list blob from the named secure variable. + * + * Returns: + * - a pointer to a kmalloc'd buffer containing the cert list on success + * - NULL if the key does not exist + * - an ERR_PTR on error */ static __init void *get_cert_list(u8 *key, unsigned long keylen, u64 *size) { @@ -25,19 +31,19 @@ static __init void *get_cert_list(u8 *key, unsigned long keylen, u64 *size) rc = secvar_ops->get(key, keylen, NULL, size); if (rc) { - pr_err("Couldn't get size: %d\n", rc); - return NULL; + if (rc == -ENOENT) + return NULL; + return ERR_PTR(rc); } db = kmalloc(*size, GFP_KERNEL); if (!db) - return NULL; + return ERR_PTR(-ENOMEM); rc = secvar_ops->get(key, keylen, db, size); if (rc) { kfree(db); - pr_err("Error reading %s var: %d\n", key, rc); - return NULL; + return ERR_PTR(rc); } return db; @@ -69,7 +75,11 @@ static int __init load_powerpc_certs(void) */ db = get_cert_list("db", 3, &dbsize); if (!db) { - pr_err("Couldn't get db list from firmware\n"); + pr_info("Couldn't get db list from firmware\n"); + } else if (IS_ERR(db)) { + rc = PTR_ERR(db); + pr_err("Error reading db from firmware: %d\n", rc); + return rc; } else { rc = parse_efi_signature_list("powerpc:db", db, dbsize, get_handler_for_db); @@ -81,6 +91,10 @@ static int __init load_powerpc_certs(void) dbx = get_cert_list("dbx", 4, &dbxsize); if (!dbx) { pr_info("Couldn't get dbx list from firmware\n"); + } else if (IS_ERR(dbx)) { + rc = PTR_ERR(dbx); + pr_err("Error reading dbx from firmware: %d\n", rc); + return rc; } else { rc = parse_efi_signature_list("powerpc:dbx", dbx, dbxsize, get_handler_for_dbx); -- cgit v1.2.3 From 4b3e71e9a34c48f370b6281e9477515d588e7b26 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 10 Feb 2023 19:04:01 +1100 Subject: integrity/powerpc: Support loading keys from PLPKS Add support for loading keys from the PLPKS on pseries machines, with the "ibm,plpks-sb-v1" format. The object format is expected to be the same, so there shouldn't be any functional differences between objects retrieved on powernv or pseries. Unlike on powernv, on pseries the format string isn't contained in the device tree. Use secvar_ops->format() to fetch the format string in a generic manner, rather than searching the device tree ourselves. (The current code searches the device tree for a node compatible with "ibm,edk2-compat-v1". This patch switches to calling secvar_ops->format(), which in the case of OPAL/powernv means opal_secvar_format(), which searches the device tree for a node compatible with "ibm,secvar-backend" and checks its "format" property. These are equivalent, as skiboot creates a node with both "ibm,edk2-compat-v1" and "ibm,secvar-backend" as compatible strings.) Signed-off-by: Russell Currey Signed-off-by: Andrew Donnellan Reviewed-by: Stefan Berger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210080401.345462-27-ajd@linux.ibm.com --- security/integrity/platform_certs/load_powerpc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/security/integrity/platform_certs/load_powerpc.c b/security/integrity/platform_certs/load_powerpc.c index dee51606d5f4..b9de70b90826 100644 --- a/security/integrity/platform_certs/load_powerpc.c +++ b/security/integrity/platform_certs/load_powerpc.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include "keyring_handler.h" @@ -59,16 +58,22 @@ static int __init load_powerpc_certs(void) void *db = NULL, *dbx = NULL; u64 dbsize = 0, dbxsize = 0; int rc = 0; - struct device_node *node; + ssize_t len; + char buf[32]; if (!secvar_ops) return -ENODEV; - /* The following only applies for the edk2-compat backend. */ - node = of_find_compatible_node(NULL, NULL, "ibm,edk2-compat-v1"); - if (!node) + len = secvar_ops->format(buf, sizeof(buf)); + if (len <= 0) return -ENODEV; + // Check for known secure boot implementations from OPAL or PLPKS + if (strcmp("ibm,edk2-compat-v1", buf) && strcmp("ibm,plpks-sb-v1", buf)) { + pr_err("Unsupported secvar implementation \"%s\", not loading certs\n", buf); + return -ENODEV; + } + /* * Get db, and dbx. They might not exist, so it isn't an error if we * can't get them. @@ -103,8 +108,6 @@ static int __init load_powerpc_certs(void) kfree(dbx); } - of_node_put(node); - return rc; } late_initcall(load_powerpc_certs); -- cgit v1.2.3 From 09d1ea72c88198ef5a9e6b8208f544fe18acbff1 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:49 -0600 Subject: powerpc/rtas: handle extended delays safely in early boot Some code that runs early in boot calls RTAS functions that can return -2 or 990x statuses, which mean the caller should retry. An example is pSeries_cmo_feature_init(), which invokes ibm,get-system-parameter but treats these benign statuses as errors instead of retrying. pSeries_cmo_feature_init() and similar code should be made to retry until they succeed or receive a real error, using the usual pattern: do { rc = rtas_call(token, etc...); } while (rtas_busy_delay(rc)); But rtas_busy_delay() will perform a timed sleep on any 990x status. This isn't safe so early in boot, before the CPU scheduler and timer subsystem have initialized. The -2 RTAS status is much more likely to occur during single-threaded boot than 990x in practice, at least on PowerVM. This is because -2 usually means that RTAS made progress but exhausted its self-imposed timeslice, while 990x is associated with concurrent requests from the OS causing internal contention. Regardless, according to the language in PAPR, the OS should be prepared to handle either type of status at any time. Add a fallback path to rtas_busy_delay() to handle this as safely as possible, performing a small delay on 990x. Include a counter to detect retry loops that aren't making progress and bail out. Add __ref to rtas_busy_delay() since it now conditionally calls an __init function. This was found by inspection and I'm not aware of any real failures. However, the implementation of rtas_busy_delay() before commit 38f7b7067dae ("powerpc/rtas: rtas_busy_delay() improvements") was not susceptible to this problem, so let's treat this as a regression. Signed-off-by: Nathan Lynch Fixes: 38f7b7067dae ("powerpc/rtas: rtas_busy_delay() improvements") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-1-26929c8cce78@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 49 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 795225d7f138..86aff1cb8a0d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -606,6 +606,47 @@ unsigned int rtas_busy_delay_time(int status) return ms; } +/* + * Early boot fallback for rtas_busy_delay(). + */ +static bool __init rtas_busy_delay_early(int status) +{ + static size_t successive_ext_delays __initdata; + bool retry; + + switch (status) { + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + /* + * In the unlikely case that we receive an extended + * delay status in early boot, the OS is probably not + * the cause, and there's nothing we can do to clear + * the condition. Best we can do is delay for a bit + * and hope it's transient. Lie to the caller if it + * seems like we're stuck in a retry loop. + */ + mdelay(1); + retry = true; + successive_ext_delays += 1; + if (successive_ext_delays > 1000) { + pr_err("too many extended delays, giving up\n"); + dump_stack(); + retry = false; + successive_ext_delays = 0; + } + break; + case RTAS_BUSY: + retry = true; + successive_ext_delays = 0; + break; + default: + retry = false; + successive_ext_delays = 0; + break; + } + + return retry; +} + /** * rtas_busy_delay() - helper for RTAS busy and extended delay statuses * @@ -624,11 +665,17 @@ unsigned int rtas_busy_delay_time(int status) * * false - @status is not @RTAS_BUSY nor an extended delay hint. The * caller is responsible for handling @status. */ -bool rtas_busy_delay(int status) +bool __ref rtas_busy_delay(int status) { unsigned int ms; bool ret; + /* + * Can't do timed sleeps before timekeeping is up. + */ + if (system_state < SYSTEM_SCHEDULING) + return rtas_busy_delay_early(status); + switch (status) { case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: ret = true; -- cgit v1.2.3 From cc4b26eab1859fa1a70711872caaf6414809973f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:50 -0600 Subject: powerpc/perf/hv-24x7: add missing RTAS retry status handling The ibm,get-system-parameter RTAS function may return -2 or 990x, which indicate that the caller should try again. read_24x7_sys_info() ignores this, allowing transient failures in reporting processor module information. Move the RTAS call into a coventional rtas_busy_delay()-based loop, along with the parsing of results on success. Signed-off-by: Nathan Lynch Fixes: 8ba214267382 ("powerpc/hv-24x7: Add rtas call in hv-24x7 driver to get processor details") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-2-26929c8cce78@linux.ibm.com --- arch/powerpc/perf/hv-24x7.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 8ce56837961e..5a8577447c9d 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -79,9 +79,8 @@ static u32 phys_coresperchip; /* Physical cores per chip */ */ void read_24x7_sys_info(void) { - int call_status, len, ntypes; - - spin_lock(&rtas_data_buf_lock); + const s32 token = rtas_token("ibm,get-system-parameter"); + int call_status; /* * Making system parameter: chips and sockets and cores per chip @@ -91,32 +90,27 @@ void read_24x7_sys_info(void) phys_chipspersocket = 1; phys_coresperchip = 1; - call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - PROCESSOR_MODULE_INFO, - __pa(rtas_data_buf), - RTAS_DATA_BUF_SIZE); + do { + spin_lock(&rtas_data_buf_lock); + call_status = rtas_call(token, 3, 1, NULL, PROCESSOR_MODULE_INFO, + __pa(rtas_data_buf), RTAS_DATA_BUF_SIZE); + if (call_status == 0) { + int ntypes = be16_to_cpup((__be16 *)&rtas_data_buf[2]); + int len = be16_to_cpup((__be16 *)&rtas_data_buf[0]); + + if (len >= 8 && ntypes != 0) { + phys_sockets = be16_to_cpup((__be16 *)&rtas_data_buf[4]); + phys_chipspersocket = be16_to_cpup((__be16 *)&rtas_data_buf[6]); + phys_coresperchip = be16_to_cpup((__be16 *)&rtas_data_buf[8]); + } + } + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(call_status)); if (call_status != 0) { pr_err("Error calling get-system-parameter %d\n", call_status); - } else { - len = be16_to_cpup((__be16 *)&rtas_data_buf[0]); - if (len < 8) - goto out; - - ntypes = be16_to_cpup((__be16 *)&rtas_data_buf[2]); - - if (!ntypes) - goto out; - - phys_sockets = be16_to_cpup((__be16 *)&rtas_data_buf[4]); - phys_chipspersocket = be16_to_cpup((__be16 *)&rtas_data_buf[6]); - phys_coresperchip = be16_to_cpup((__be16 *)&rtas_data_buf[8]); } - -out: - spin_unlock(&rtas_data_buf_lock); } /* Domains for which more than one result element are returned for each event. */ -- cgit v1.2.3 From daa8ab59044610aa8ef2ee45a6c157b5e11635e9 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:51 -0600 Subject: powerpc/pseries/lpar: add missing RTAS retry status handling The ibm,get-system-parameter RTAS function may return -2 or 990x, which indicate that the caller should try again. pseries_lpar_read_hblkrm_characteristics() ignores this, making it possible to incorrectly detect TLB block invalidation characteristics at boot. Move the RTAS call into a coventional rtas_busy_delay()-based loop. Signed-off-by: Nathan Lynch Fixes: 1211ee61b4a8 ("powerpc/pseries: Read TLB Block Invalidate Characteristics") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-3-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/lpar.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 97ef6499e501..6597b2126ebb 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1481,22 +1481,22 @@ static inline void __init check_lp_set_hblkrm(unsigned int lp, void __init pseries_lpar_read_hblkrm_characteristics(void) { + const s32 token = rtas_token("ibm,get-system-parameter"); unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH]; int call_status, len, idx, bpsize; if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) return; - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); - call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - SPLPAR_TLB_BIC_TOKEN, - __pa(rtas_data_buf), - RTAS_DATA_BUF_SIZE); - memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH); - local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0'; - spin_unlock(&rtas_data_buf_lock); + do { + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); + call_status = rtas_call(token, 3, 1, NULL, SPLPAR_TLB_BIC_TOKEN, + __pa(rtas_data_buf), RTAS_DATA_BUF_SIZE); + memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH); + local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0'; + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(call_status)); if (call_status != 0) { pr_warn("%s %s Error calling get-system-parameter (0x%x)\n", -- cgit v1.2.3 From 5d08633e5f6564b60f1cbe09af3af40a74d66431 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:52 -0600 Subject: powerpc/pseries/lparcfg: add missing RTAS retry status handling The ibm,get-system-parameter RTAS function may return -2 or 990x, which indicate that the caller should try again. lparcfg's parse_system_parameter_string() ignores this, making it possible to intermittently report incorrect SPLPAR characteristics. Move the RTAS call into a coventional rtas_busy_delay()-based loop. Signed-off-by: Nathan Lynch Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-4-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/lparcfg.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 63fd925ccbb8..cd33d5800763 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -408,6 +408,7 @@ static void read_lpar_name(struct seq_file *m) */ static void parse_system_parameter_string(struct seq_file *m) { + const s32 token = rtas_token("ibm,get-system-parameter"); int call_status; unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); @@ -417,16 +418,15 @@ static void parse_system_parameter_string(struct seq_file *m) return; } - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); - call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - SPLPAR_CHARACTERISTICS_TOKEN, - __pa(rtas_data_buf), - RTAS_DATA_BUF_SIZE); - memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); - local_buffer[SPLPAR_MAXLENGTH - 1] = '\0'; - spin_unlock(&rtas_data_buf_lock); + do { + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); + call_status = rtas_call(token, 3, 1, NULL, SPLPAR_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf), RTAS_DATA_BUF_SIZE); + memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); + local_buffer[SPLPAR_MAXLENGTH - 1] = '\0'; + spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(call_status)); if (call_status != 0) { printk(KERN_INFO -- cgit v1.2.3 From b7d5333c48a21fd6a20f54b6887bcc191d21c273 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:53 -0600 Subject: powerpc/pseries/setup: add missing RTAS retry status handling The ibm,get-system-parameter RTAS function may return -2 or 990x, which indicate that the caller should try again. pSeries_cmo_feature_init() ignores this, making it possible to fail to detect cooperative memory overcommit capabilities during boot. Move the RTAS call into a conventional rtas_busy_delay()-based loop, dropping unnecessary clearing of rtas_data_buf. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-5-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8ef3270515a9..74e50b6b28d4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -941,21 +941,25 @@ void pSeries_coalesce_init(void) */ static void __init pSeries_cmo_feature_init(void) { + const s32 token = rtas_token("ibm,get-system-parameter"); char *ptr, *key, *value, *end; int call_status; int page_order = IOMMU_PAGE_SHIFT_4K; pr_debug(" -> fw_cmo_feature_init()\n"); - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); - call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - CMO_CHARACTERISTICS_TOKEN, - __pa(rtas_data_buf), - RTAS_DATA_BUF_SIZE); - if (call_status != 0) { + do { + spin_lock(&rtas_data_buf_lock); + call_status = rtas_call(token, 3, 1, NULL, + CMO_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + if (call_status == 0) + break; spin_unlock(&rtas_data_buf_lock); + } while (rtas_busy_delay(call_status)); + + if (call_status != 0) { pr_debug("CMO not available\n"); pr_debug(" <- fw_cmo_feature_init()\n"); return; -- cgit v1.2.3 From 836b5b9fcc8e09cea7e8a59a070349a00e818308 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:54 -0600 Subject: powerpc/rtas: ensure 4KB alignment for rtas_data_buf Some RTAS functions that have work area parameters impose alignment requirements on the work area passed to them by the OS. Examples include: - ibm,configure-connector - ibm,update-nodes - ibm,update-properties 4KB is the greatest alignment required by PAPR for such buffers. rtas_data_buf used to have a __page_aligned attribute in the arch/ppc64 days, but that was changed to __cacheline_aligned for unknown reasons by commit 033ef338b6e0 ("powerpc: Merge rtas.c into arch/powerpc/kernel"). That works out to 128-byte alignment on ppc64, which isn't right. This was found by inspection and I'm not aware of any real problems caused by this. Either current RTAS implementations don't enforce the alignment constraints, or rtas_data_buf is always being placed at a 4KB boundary by accident (or both, perhaps). Use __aligned(SZ_4K) to ensure the rtas_data_buf has alignment appropriate for all users. Signed-off-by: Nathan Lynch Fixes: 033ef338b6e0 ("powerpc: Merge rtas.c into arch/powerpc/kernel") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-6-26929c8cce78@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 86aff1cb8a0d..cce7f69b4ba1 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -75,7 +75,7 @@ static struct rtas_args rtas_args; DEFINE_SPINLOCK(rtas_data_buf_lock); EXPORT_SYMBOL_GPL(rtas_data_buf_lock); -char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; +char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K); EXPORT_SYMBOL_GPL(rtas_data_buf); unsigned long rtas_rmo_buf; -- cgit v1.2.3 From d6f7fe3b25f26213953066ce8109ea47dbd33cfa Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:55 -0600 Subject: powerpc/pseries: drop RTAS-based timebase synchronization The pseries platform has been LPAR-only for several generations, and the PAPR spec: * Guarantees that timebase synchronization is performed by the platform ("The timebase registers are synchronized by the platform before CPUs are given to the OS" - 7.3.8 SMP Support). * Completely omits the RTAS freeze-time-base and thaw-time-base RTAS functions, which are CHRP artifacts. This code is effectively unused on currently supported models, so drop it. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-7-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/smp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index fd2174edfa1d..2bcfee86ff87 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -278,11 +278,5 @@ void __init smp_init_pseries(void) cpumask_clear_cpu(boot_cpuid, of_spin_mask); } - /* Non-lpar has additional take/give timebase */ - if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { - smp_ops->give_timebase = rtas_give_timebase; - smp_ops->take_timebase = rtas_take_timebase; - } - pr_debug(" <- smp_init_pSeries()\n"); } -- cgit v1.2.3 From 8252b88294d2a744df6e3c6d85909ade403a5f2c Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:56 -0600 Subject: powerpc/rtas: improve function information lookups The core RTAS support code and its clients perform two types of lookup for RTAS firmware function information. First, mapping a known function name to a token. The typical use case invokes rtas_token() to retrieve the token value to pass to rtas_call(). rtas_token() relies on of_get_property(), which performs a linear search of the /rtas node's property list under a lock with IRQs disabled. Second, and less common: given a token value, looking up some information about the function. The primary example is the sys_rtas filter path, which linearly scans a small table to match the token to a rtas_filter struct. Another use case to come is RTAS entry/exit tracepoints, which will require efficient lookup of function names from token values. Currently there is no general API for this. We need something much like the existing rtas_filters table, but more general and organized to facilitate efficient lookups. Introduce: * A new rtas_function type, aggregating function name, token, and filter. Other function characteristics could be added in the future. * An array of rtas_function, where each element corresponds to a known RTAS function. All information in the table is static save the token values, which are derived from the device tree at boot. The array is sorted by function name to allow binary search. * A named constant for each known RTAS function, used to index the function array. These also will be used in a client-facing API to be added later. * An xarray that maps valid tokens to rtas_function objects. Fold the existing rtas_filter table into the new rtas_function array, with the appropriate adjustments to block_rtas_call(). Remove now-redundant fields from struct rtas_filter. Preserve the function of the CONFIG_CPU_BIG_ENDIAN guard in the current filter table by introducing a per-function flag that is set for the function entries related to pseries LPAR migration. These have never had working users via sys_rtas on ppc64le; see commit de0f7349a0dd ("powerpc/rtas: prevent suspend-related sys_rtas use on LE"). Convert rtas_token() to use a lockless binary search on the function table. Fall back to the old behavior for lookups against names that are not known to be RTAS functions, but issue a warning. rtas_token() is for function names; it is not a general facility for accessing arbitrary properties of the /rtas node. All known misuses of rtas_token() have been converted to more appropriate of_ APIs in preceding changes. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-8-26929c8cce78@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 85 +++++ arch/powerpc/kernel/rtas.c | 721 ++++++++++++++++++++++++++++++++++------ 2 files changed, 700 insertions(+), 106 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 479a95cb2770..f03891891a2d 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -16,6 +16,91 @@ * Copyright (C) 2001 PPC 64 Team, IBM Corp */ +enum rtas_function_index { + RTAS_FNIDX__CHECK_EXCEPTION, + RTAS_FNIDX__DISPLAY_CHARACTER, + RTAS_FNIDX__EVENT_SCAN, + RTAS_FNIDX__FREEZE_TIME_BASE, + RTAS_FNIDX__GET_POWER_LEVEL, + RTAS_FNIDX__GET_SENSOR_STATE, + RTAS_FNIDX__GET_TERM_CHAR, + RTAS_FNIDX__GET_TIME_OF_DAY, + RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE, + RTAS_FNIDX__IBM_CBE_START_PTCAL, + RTAS_FNIDX__IBM_CBE_STOP_PTCAL, + RTAS_FNIDX__IBM_CHANGE_MSI, + RTAS_FNIDX__IBM_CLOSE_ERRINJCT, + RTAS_FNIDX__IBM_CONFIGURE_BRIDGE, + RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR, + RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP, + RTAS_FNIDX__IBM_CONFIGURE_PE, + RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW, + RTAS_FNIDX__IBM_DISPLAY_MESSAGE, + RTAS_FNIDX__IBM_ERRINJCT, + RTAS_FNIDX__IBM_EXTI2C, + RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO, + RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2, + RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE, + RTAS_FNIDX__IBM_GET_INDICES, + RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY, + RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER, + RTAS_FNIDX__IBM_GET_VPD, + RTAS_FNIDX__IBM_GET_XIVE, + RTAS_FNIDX__IBM_INT_OFF, + RTAS_FNIDX__IBM_INT_ON, + RTAS_FNIDX__IBM_IO_QUIESCE_ACK, + RTAS_FNIDX__IBM_LPAR_PERFTOOLS, + RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE, + RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION, + RTAS_FNIDX__IBM_NMI_INTERLOCK, + RTAS_FNIDX__IBM_NMI_REGISTER, + RTAS_FNIDX__IBM_OPEN_ERRINJCT, + RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE, + RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER, + RTAS_FNIDX__IBM_OS_TERM, + RTAS_FNIDX__IBM_PARTNER_CONTROL, + RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION, + RTAS_FNIDX__IBM_PLATFORM_DUMP, + RTAS_FNIDX__IBM_POWER_OFF_UPS, + RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER, + RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW, + RTAS_FNIDX__IBM_READ_PCI_CONFIG, + RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE, + RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2, + RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW, + RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS, + RTAS_FNIDX__IBM_SCAN_LOG_DUMP, + RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR, + RTAS_FNIDX__IBM_SET_EEH_OPTION, + RTAS_FNIDX__IBM_SET_SLOT_RESET, + RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER, + RTAS_FNIDX__IBM_SET_XIVE, + RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL, + RTAS_FNIDX__IBM_SUSPEND_ME, + RTAS_FNIDX__IBM_TUNE_DMA_PARMS, + RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT, + RTAS_FNIDX__IBM_UPDATE_NODES, + RTAS_FNIDX__IBM_UPDATE_PROPERTIES, + RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE, + RTAS_FNIDX__IBM_WRITE_PCI_CONFIG, + RTAS_FNIDX__NVRAM_FETCH, + RTAS_FNIDX__NVRAM_STORE, + RTAS_FNIDX__POWER_OFF, + RTAS_FNIDX__PUT_TERM_CHAR, + RTAS_FNIDX__QUERY_CPU_STOPPED_STATE, + RTAS_FNIDX__READ_PCI_CONFIG, + RTAS_FNIDX__RTAS_LAST_ERROR, + RTAS_FNIDX__SET_INDICATOR, + RTAS_FNIDX__SET_POWER_LEVEL, + RTAS_FNIDX__SET_TIME_FOR_POWER_ON, + RTAS_FNIDX__SET_TIME_OF_DAY, + RTAS_FNIDX__START_CPU, + RTAS_FNIDX__STOP_SELF, + RTAS_FNIDX__SYSTEM_REBOOT, + RTAS_FNIDX__THAW_TIME_BASE, + RTAS_FNIDX__WRITE_PCI_CONFIG, +}; + #define RTAS_UNKNOWN_SERVICE (-1) #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index cce7f69b4ba1..3c7151f4f2ba 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -9,10 +9,12 @@ #define pr_fmt(fmt) "rtas: " fmt +#include #include #include #include #include +#include #include #include #include @@ -26,6 +28,7 @@ #include #include #include +#include #include #include @@ -37,6 +40,486 @@ #include #include +struct rtas_filter { + /* Indexes into the args buffer, -1 if not used */ + const int buf_idx1; + const int size_idx1; + const int buf_idx2; + const int size_idx2; + /* + * Assumed buffer size per the spec if the function does not + * have a size parameter, e.g. ibm,errinjct. 0 if unused. + */ + const int fixed_size; +}; + +/** + * struct rtas_function - Descriptor for RTAS functions. + * + * @token: Value of @name if it exists under the /rtas node. + * @name: Function name. + * @filter: If non-NULL, invoking this function via the rtas syscall is + * generally allowed, and @filter describes constraints on the + * arguments. See also @banned_for_syscall_on_le. + * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed + * but specifically restricted on ppc64le. Such + * functions are believed to have no users on + * ppc64le, and we want to keep it that way. It does + * not make sense for this to be set when @filter + * is false. + */ +struct rtas_function { + s32 token; + const bool banned_for_syscall_on_le:1; + const char * const name; + const struct rtas_filter *filter; +}; + +static struct rtas_function rtas_function_table[] __ro_after_init = { + [RTAS_FNIDX__CHECK_EXCEPTION] = { + .name = "check-exception", + }, + [RTAS_FNIDX__DISPLAY_CHARACTER] = { + .name = "display-character", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__EVENT_SCAN] = { + .name = "event-scan", + }, + [RTAS_FNIDX__FREEZE_TIME_BASE] = { + .name = "freeze-time-base", + }, + [RTAS_FNIDX__GET_POWER_LEVEL] = { + .name = "get-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_SENSOR_STATE] = { + .name = "get-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_TERM_CHAR] = { + .name = "get-term-char", + }, + [RTAS_FNIDX__GET_TIME_OF_DAY] = { + .name = "get-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = { + .name = "ibm,activate-firmware", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { + .name = "ibm,cbe-start-ptcal", + }, + [RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = { + .name = "ibm,cbe-stop-ptcal", + }, + [RTAS_FNIDX__IBM_CHANGE_MSI] = { + .name = "ibm,change-msi", + }, + [RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = { + .name = "ibm,close-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = { + .name = "ibm,configure-bridge", + }, + [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = { + .name = "ibm,configure-connector", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = { + .name = "ibm,configure-kernel-dump", + }, + [RTAS_FNIDX__IBM_CONFIGURE_PE] = { + .name = "ibm,configure-pe", + }, + [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = { + .name = "ibm,create-pe-dma-window", + }, + [RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = { + .name = "ibm,display-message", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ERRINJCT] = { + .name = "ibm,errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 1024, + }, + }, + [RTAS_FNIDX__IBM_EXTI2C] = { + .name = "ibm,exti2c", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = { + .name = "ibm,get-config-addr-info", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = { + .name = "ibm,get-config-addr-info2", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = { + .name = "ibm,get-dynamic-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_INDICES] = { + .name = "ibm,get-indices", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { + .name = "ibm,get-rio-topology", + }, + [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = { + .name = "ibm,get-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = 2, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_VPD] = { + .name = "ibm,get-vpd", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = 2, + }, + }, + [RTAS_FNIDX__IBM_GET_XIVE] = { + .name = "ibm,get-xive", + }, + [RTAS_FNIDX__IBM_INT_OFF] = { + .name = "ibm,int-off", + }, + [RTAS_FNIDX__IBM_INT_ON] = { + .name = "ibm,int-on", + }, + [RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = { + .name = "ibm,io-quiesce-ack", + }, + [RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = { + .name = "ibm,lpar-perftools", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { + .name = "ibm,manage-flash-image", + }, + [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = { + .name = "ibm,manage-storage-preservation", + }, + [RTAS_FNIDX__IBM_NMI_INTERLOCK] = { + .name = "ibm,nmi-interlock", + }, + [RTAS_FNIDX__IBM_NMI_REGISTER] = { + .name = "ibm,nmi-register", + }, + [RTAS_FNIDX__IBM_OPEN_ERRINJCT] = { + .name = "ibm,open-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = { + .name = "ibm,open-sriov-allow-unfreeze", + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = { + .name = "ibm,open-sriov-map-pe-number", + }, + [RTAS_FNIDX__IBM_OS_TERM] = { + .name = "ibm,os-term", + }, + [RTAS_FNIDX__IBM_PARTNER_CONTROL] = { + .name = "ibm,partner-control", + }, + [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = { + .name = "ibm,physical-attestation", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { + .name = "ibm,platform-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 4, .size_idx1 = 5, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { + .name = "ibm,power-off-ups", + }, + [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = { + .name = "ibm,query-interrupt-source-number", + }, + [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = { + .name = "ibm,query-pe-dma-window", + }, + [RTAS_FNIDX__IBM_READ_PCI_CONFIG] = { + .name = "ibm,read-pci-config", + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = { + .name = "ibm,read-slot-reset-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = { + .name = "ibm,read-slot-reset-state2", + }, + [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { + .name = "ibm,remove-pe-dma-window", + }, + [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = { + .name = "ibm,reset-pe-dma-windows", + }, + [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { + .name = "ibm,scan-log-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = { + .name = "ibm,set-dynamic-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { + .name = "ibm,set-eeh-option", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_SLOT_RESET] = { + .name = "ibm,set-slot-reset", + }, + [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = { + .name = "ibm,set-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_XIVE] = { + .name = "ibm,set-xive", + }, + [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = { + .name = "ibm,slot-error-detail", + }, + [RTAS_FNIDX__IBM_SUSPEND_ME] = { + .name = "ibm,suspend-me", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = { + .name = "ibm,tune-dma-parms", + }, + [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = { + .name = "ibm,update-flash-64-and-reboot", + }, + [RTAS_FNIDX__IBM_UPDATE_NODES] = { + .name = "ibm,update-nodes", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = { + .name = "ibm,update-properties", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = { + .name = "ibm,validate-flash-image", + }, + [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = { + .name = "ibm,write-pci-config", + }, + [RTAS_FNIDX__NVRAM_FETCH] = { + .name = "nvram-fetch", + }, + [RTAS_FNIDX__NVRAM_STORE] = { + .name = "nvram-store", + }, + [RTAS_FNIDX__POWER_OFF] = { + .name = "power-off", + }, + [RTAS_FNIDX__PUT_TERM_CHAR] = { + .name = "put-term-char", + }, + [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = { + .name = "query-cpu-stopped-state", + }, + [RTAS_FNIDX__READ_PCI_CONFIG] = { + .name = "read-pci-config", + }, + [RTAS_FNIDX__RTAS_LAST_ERROR] = { + .name = "rtas-last-error", + }, + [RTAS_FNIDX__SET_INDICATOR] = { + .name = "set-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_POWER_LEVEL] = { + .name = "set-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = { + .name = "set-time-for-power-on", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_OF_DAY] = { + .name = "set-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__START_CPU] = { + .name = "start-cpu", + }, + [RTAS_FNIDX__STOP_SELF] = { + .name = "stop-self", + }, + [RTAS_FNIDX__SYSTEM_REBOOT] = { + .name = "system-reboot", + }, + [RTAS_FNIDX__THAW_TIME_BASE] = { + .name = "thaw-time-base", + }, + [RTAS_FNIDX__WRITE_PCI_CONFIG] = { + .name = "write-pci-config", + }, +}; + +static int rtas_function_cmp(const void *a, const void *b) +{ + const struct rtas_function *f1 = a; + const struct rtas_function *f2 = b; + + return strcmp(f1->name, f2->name); +} + +/* + * Boot-time initialization of the function table needs the lookup to + * return a non-const-qualified object. Use rtas_name_to_function() + * in all other contexts. + */ +static struct rtas_function *__rtas_name_to_function(const char *name) +{ + const struct rtas_function key = { + .name = name, + }; + struct rtas_function *found; + + found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table), + sizeof(rtas_function_table[0]), rtas_function_cmp); + + return found; +} + +static const struct rtas_function *rtas_name_to_function(const char *name) +{ + return __rtas_name_to_function(name); +} + +static DEFINE_XARRAY(rtas_token_to_function_xarray); + +static int __init rtas_token_to_function_xarray_init(void) +{ + int err = 0; + + for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { + const struct rtas_function *func = &rtas_function_table[i]; + const s32 token = func->token; + + if (token == RTAS_UNKNOWN_SERVICE) + continue; + + err = xa_err(xa_store(&rtas_token_to_function_xarray, + token, (void *)func, GFP_KERNEL)); + if (err) + break; + } + + return err; +} +arch_initcall(rtas_token_to_function_xarray_init); + +static const struct rtas_function *rtas_token_to_function(s32 token) +{ + const struct rtas_function *func; + + if (WARN_ONCE(token < 0, "invalid token %d", token)) + return NULL; + + func = xa_load(&rtas_token_to_function_xarray, token); + + if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token)) + return NULL; + + return func; +} + /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); @@ -315,9 +798,25 @@ EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { + const struct rtas_function *func; const __be32 *tokp; + if (rtas.dev == NULL) return RTAS_UNKNOWN_SERVICE; + + func = rtas_name_to_function(service); + if (func) + return func->token; + /* + * The caller is looking up a name that is not known to be an + * RTAS function. Either it's a function that needs to be + * added to the table, or they're misusing rtas_token() to + * access non-function properties of the /rtas node. Warn and + * fall back to the legacy behavior. + */ + WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n", + service); + tokp = of_get_property(rtas.dev, service, NULL); return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE; } @@ -1090,56 +1589,12 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log * * Accordingly, we filter RTAS requests to check that the call is * permitted, and that provided pointers fall within the RMO buffer. - * The rtas_filters list contains an entry for each permitted call, - * with the indexes of the parameters which are expected to contain - * addresses and sizes of buffers allocated inside the RMO buffer. + * If a function is allowed to be invoked via the syscall, then its + * entry in the rtas_functions table points to a rtas_filter that + * describes its constraints, with the indexes of the parameters which + * are expected to contain addresses and sizes of buffers allocated + * inside the RMO buffer. */ -struct rtas_filter { - const char *name; - int token; - /* Indexes into the args buffer, -1 if not used */ - int buf_idx1; - int size_idx1; - int buf_idx2; - int size_idx2; - - int fixed_size; -}; - -static struct rtas_filter rtas_filters[] __ro_after_init = { - { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, - { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ - { "display-character", -1, -1, -1, -1, -1 }, - { "ibm,display-message", -1, 0, -1, -1, -1 }, - { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, - { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, - { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, - { "ibm,get-indices", -1, 2, 3, -1, -1 }, - { "get-power-level", -1, -1, -1, -1, -1 }, - { "get-sensor-state", -1, -1, -1, -1, -1 }, - { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, - { "get-time-of-day", -1, -1, -1, -1, -1 }, - { "ibm,get-vpd", -1, 0, -1, 1, 2 }, - { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ - { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, - { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, - { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, - { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, - { "set-indicator", -1, -1, -1, -1, -1 }, - { "set-power-level", -1, -1, -1, -1, -1 }, - { "set-time-for-power-on", -1, -1, -1, -1, -1 }, - { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, - { "set-time-of-day", -1, -1, -1, -1, -1 }, -#ifdef CONFIG_CPU_BIG_ENDIAN - { "ibm,suspend-me", -1, -1, -1, -1, -1 }, - { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, - { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, -#endif - { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, -}; static bool in_rmo_buf(u32 base, u32 end) { @@ -1153,63 +1608,75 @@ static bool in_rmo_buf(u32 base, u32 end) static bool block_rtas_call(int token, int nargs, struct rtas_args *args) { - int i; + const struct rtas_function *func; + const struct rtas_filter *f; + u32 base, size, end; - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { - struct rtas_filter *f = &rtas_filters[i]; - u32 base, size, end; - - if (token != f->token) - continue; - - if (f->buf_idx1 != -1) { - base = be32_to_cpu(args->args[f->buf_idx1]); - if (f->size_idx1 != -1) - size = be32_to_cpu(args->args[f->size_idx1]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - - end = base + size - 1; + /* + * If this token doesn't correspond to a function the kernel + * understands, you're not allowed to call it. + */ + func = rtas_token_to_function(token); + if (!func) + goto err; + /* + * And only functions with filters attached are allowed. + */ + f = func->filter; + if (!f) + goto err; + /* + * And some functions aren't allowed on LE. + */ + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le) + goto err; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; - /* - * Special case for ibm,platform-dump - NULL buffer - * address is used to indicate end of dump processing - */ - if (!strcmp(f->name, "ibm,platform-dump") && - base == 0) - return false; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (!strcmp(func->name, "ibm,platform-dump") && + base == 0) + return false; - if (f->buf_idx2 != -1) { - base = be32_to_cpu(args->args[f->buf_idx2]); - if (f->size_idx2 != -1) - size = be32_to_cpu(args->args[f->size_idx2]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } - /* - * Special case for ibm,configure-connector where the - * address can be 0 - */ - if (!strcmp(f->name, "ibm,configure-connector") && - base == 0) - return false; + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (!strcmp(func->name, "ibm,configure-connector") && + base == 0) + return false; - return false; + if (!in_rmo_buf(base, end)) + goto err; } + return false; err: pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", @@ -1217,14 +1684,6 @@ err: return true; } -static void __init rtas_syscall_filter_init(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) - rtas_filters[i].token = rtas_token(rtas_filters[i].name); -} - /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -1324,6 +1783,54 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return 0; } +static void __init rtas_function_table_init(void) +{ + struct property *prop; + + for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { + struct rtas_function *curr = &rtas_function_table[i]; + struct rtas_function *prior; + int cmp; + + curr->token = RTAS_UNKNOWN_SERVICE; + + if (i == 0) + continue; + /* + * Ensure table is sorted correctly for binary search + * on function names. + */ + prior = &rtas_function_table[i - 1]; + + cmp = strcmp(prior->name, curr->name); + if (cmp < 0) + continue; + + if (cmp == 0) { + pr_err("'%s' has duplicate function table entries\n", + curr->name); + } else { + pr_err("function table unsorted: '%s' wrongly precedes '%s'\n", + prior->name, curr->name); + } + } + + for_each_property_of_node(rtas.dev, prop) { + struct rtas_function *func; + + if (prop->length != sizeof(u32)) + continue; + + func = __rtas_name_to_function(prop->name); + if (!func) + continue; + + func->token = be32_to_cpup((__be32 *)prop->value); + + pr_debug("function %s has token %u\n", func->name, func->token); + } +} + /* * Call early during boot, before mem init, to retrieve the RTAS * information from the device-tree and allocate the RMO buffer for userland @@ -1357,6 +1864,9 @@ void __init rtas_initialize(void) init_error_log_max(); + /* Must be called before any function token lookups */ + rtas_function_table_init(); + /* * Discover these now to avoid device tree lookups in the * panic path. @@ -1382,7 +1892,6 @@ void __init rtas_initialize(void) #endif ibm_open_errinjct_token = rtas_token("ibm,open-errinjct"); ibm_errinjct_token = rtas_token("ibm,errinjct"); - rtas_syscall_filter_init(); } int __init early_init_dt_scan_rtas(unsigned long node, -- cgit v1.2.3 From 77f85f69a97ac5f24537261a893436926c3e0cdc Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:57 -0600 Subject: powerpc/rtas: strengthen do_enter_rtas() type safety, drop inline Make do_enter_rtas() take a pointer to struct rtas_args and do the __pa() conversion in one place instead of leaving it to callers. This also makes it possible to introduce enter/exit tracepoints that access the rtas_args struct fields. There's no apparent reason to force inlining of do_enter_rtas() either, and it seems to bloat the code a bit. Let the compiler decide. Signed-off-by: Nathan Lynch Reviewed-by: Andrew Donnellan Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-9-26929c8cce78@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 3c7151f4f2ba..d2622b997b68 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -523,7 +523,7 @@ static const struct rtas_function *rtas_token_to_function(s32 token) /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); -static inline void do_enter_rtas(unsigned long args) +static void do_enter_rtas(struct rtas_args *args) { unsigned long msr; @@ -538,7 +538,7 @@ static inline void do_enter_rtas(unsigned long args) hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */ - enter_rtas(args); + enter_rtas(__pa(args)); srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ } @@ -892,7 +892,7 @@ static char *__fetch_rtas_last_error(char *altbuf) save_args = rtas_args; rtas_args = err_args; - do_enter_rtas(__pa(&rtas_args)); + do_enter_rtas(&rtas_args); err_args = rtas_args; rtas_args = save_args; @@ -939,7 +939,7 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, for (i = 0; i < nret; ++i) args->rets[i] = 0; - do_enter_rtas(__pa(args)); + do_enter_rtas(args); } void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) @@ -1757,7 +1757,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) raw_spin_lock_irqsave(&rtas_lock, flags); rtas_args = args; - do_enter_rtas(__pa(&rtas_args)); + do_enter_rtas(&rtas_args); args = rtas_args; /* A -1 return code indicates that the last command couldn't -- cgit v1.2.3 From 2c81ca7fbaea06c2aed1aec66a88208d67e1e2de Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:58 -0600 Subject: powerpc/tracing: tracepoints for RTAS entry and exit Add two sets of tracepoints to be used around RTAS entry: * rtas_input/rtas_output, which emit the function name, its inputs, the returned status, and any other outputs. These produce an API-level record of OS<->RTAS activity. * rtas_ll_entry/rtas_ll_exit, which are lower-level and emit the entire contents of the parameter block (aka rtas_args) on entry and exit. Likely useful only for debugging. With uses of these tracepoints in do_enter_rtas() to be added in the following patch, examples of get-time-of-day and event-scan functions as rendered by trace-cmd (with some multi-line formatting manually imposed on the rtas_ll_* entries to avoid extremely long lines in the commit message): cat-36800 [059] 4978.518303: rtas_input: get-time-of-day arguments: cat-36800 [059] 4978.518306: rtas_ll_entry: token=3 nargs=0 nret=8 params: [0]=0x00000000 [1]=0x00000000 [2]=0x00000000 [3]=0x00000000 [4]=0x00000000 [5]=0x00000000 [6]=0x00000000 [7]=0x00000000 [8]=0x00000000 [9]=0x00000000 [10]=0x00000000 [11]=0x00000000 [12]=0x00000000 [13]=0x00000000 [14]=0x00000000 [15]=0x00000000 cat-36800 [059] 4978.518366: rtas_ll_exit: token=3 nargs=0 nret=8 params: [0]=0x00000000 [1]=0x000007e6 [2]=0x0000000b [3]=0x00000001 [4]=0x00000000 [5]=0x0000000e [6]=0x00000008 [7]=0x2e0dac40 [8]=0x00000000 [9]=0x00000000 [10]=0x00000000 [11]=0x00000000 [12]=0x00000000 [13]=0x00000000 [14]=0x00000000 [15]=0x00000000 cat-36800 [059] 4978.518366: rtas_output: get-time-of-day status: 0, other outputs: 2022 11 1 0 14 8 772648000 kworker/39:1-336 [039] 4982.731623: rtas_input: event-scan arguments: 4294967295 0 80484920 2048 kworker/39:1-336 [039] 4982.731626: rtas_ll_entry: token=6 nargs=4 nret=1 params: [0]=0xffffffff [1]=0x00000000 [2]=0x04cc1a38 [3]=0x00000800 [4]=0x00000000 [5]=0x0000000e [6]=0x00000008 [7]=0x2e0dac40 [8]=0x00000000 [9]=0x00000000 [10]=0x00000000 [11]=0x00000000 [12]=0x00000000 [13]=0x00000000 [14]=0x00000000 [15]=0x00000000 kworker/39:1-336 [039] 4982.731676: rtas_ll_exit: token=6 nargs=4 nret=1 params: [0]=0xffffffff [1]=0x00000000 [2]=0x04cc1a38 [3]=0x00000800 [4]=0x00000001 [5]=0x0000000e [6]=0x00000008 [7]=0x2e0dac40 [8]=0x00000000 [9]=0x00000000 [10]=0x00000000 [11]=0x00000000 [12]=0x00000000 [13]=0x00000000 [14]=0x00000000 [15]=0x00000000 kworker/39:1-336 [039] 4982.731677: rtas_output: event-scan status: 1, other outputs: Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-10-26929c8cce78@linux.ibm.com --- arch/powerpc/include/asm/trace.h | 103 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 08cd60cd70b7..82cc2c6704e6 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -119,6 +119,109 @@ TRACE_EVENT_FN_COND(hcall_exit, ); #endif +#ifdef CONFIG_PPC_RTAS + +#include + +TRACE_EVENT(rtas_input, + + TP_PROTO(struct rtas_args *rtas_args, const char *name), + + TP_ARGS(rtas_args, name), + + TP_STRUCT__entry( + __field(__u32, nargs) + __string(name, name) + __dynamic_array(__u32, inputs, be32_to_cpu(rtas_args->nargs)) + ), + + TP_fast_assign( + __entry->nargs = be32_to_cpu(rtas_args->nargs); + __assign_str(name, name); + be32_to_cpu_array(__get_dynamic_array(inputs), rtas_args->args, __entry->nargs); + ), + + TP_printk("%s arguments: %s", __get_str(name), + __print_array(__get_dynamic_array(inputs), __entry->nargs, 4) + ) +); + +TRACE_EVENT(rtas_output, + + TP_PROTO(struct rtas_args *rtas_args, const char *name), + + TP_ARGS(rtas_args, name), + + TP_STRUCT__entry( + __field(__u32, nr_other) + __field(__s32, status) + __string(name, name) + __dynamic_array(__u32, other_outputs, be32_to_cpu(rtas_args->nret) - 1) + ), + + TP_fast_assign( + __entry->nr_other = be32_to_cpu(rtas_args->nret) - 1; + __entry->status = be32_to_cpu(rtas_args->rets[0]); + __assign_str(name, name); + be32_to_cpu_array(__get_dynamic_array(other_outputs), + &rtas_args->rets[1], __entry->nr_other); + ), + + TP_printk("%s status: %d, other outputs: %s", __get_str(name), __entry->status, + __print_array(__get_dynamic_array(other_outputs), + __entry->nr_other, 4) + ) +); + +DECLARE_EVENT_CLASS(rtas_parameter_block, + + TP_PROTO(struct rtas_args *rtas_args), + + TP_ARGS(rtas_args), + + TP_STRUCT__entry( + __field(u32, token) + __field(u32, nargs) + __field(u32, nret) + __array(__u32, params, 16) + ), + + TP_fast_assign( + __entry->token = be32_to_cpu(rtas_args->token); + __entry->nargs = be32_to_cpu(rtas_args->nargs); + __entry->nret = be32_to_cpu(rtas_args->nret); + be32_to_cpu_array(__entry->params, rtas_args->args, ARRAY_SIZE(rtas_args->args)); + ), + + TP_printk("token=%u nargs=%u nret=%u params:" + " [0]=0x%08x [1]=0x%08x [2]=0x%08x [3]=0x%08x" + " [4]=0x%08x [5]=0x%08x [6]=0x%08x [7]=0x%08x" + " [8]=0x%08x [9]=0x%08x [10]=0x%08x [11]=0x%08x" + " [12]=0x%08x [13]=0x%08x [14]=0x%08x [15]=0x%08x", + __entry->token, __entry->nargs, __entry->nret, + __entry->params[0], __entry->params[1], __entry->params[2], __entry->params[3], + __entry->params[4], __entry->params[5], __entry->params[6], __entry->params[7], + __entry->params[8], __entry->params[9], __entry->params[10], __entry->params[11], + __entry->params[12], __entry->params[13], __entry->params[14], __entry->params[15] + ) +); + +DEFINE_EVENT(rtas_parameter_block, rtas_ll_entry, + + TP_PROTO(struct rtas_args *rtas_args), + + TP_ARGS(rtas_args) +); + +DEFINE_EVENT(rtas_parameter_block, rtas_ll_exit, + + TP_PROTO(struct rtas_args *rtas_args), + + TP_ARGS(rtas_args) +); + +#endif /* CONFIG_PPC_RTAS */ + #ifdef CONFIG_PPC_POWERNV extern int opal_tracepoint_regfunc(void); extern void opal_tracepoint_unregfunc(void); -- cgit v1.2.3 From 24098f580e2b5ceb2cec4f02833e0a0bb5d46d2e Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:41:59 -0600 Subject: powerpc/rtas: add tracepoints around RTAS entry Decompose the RTAS entry C code into tracing and non-tracing variants, calling the just-added tracepoints in the tracing-enabled path. Skip tracing in contexts known to be unsafe (real mode, CPU offline). Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-11-26929c8cce78@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 59 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d2622b997b68..a6d0c46b4512 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -38,6 +38,7 @@ #include #include #include +#include #include struct rtas_filter { @@ -523,24 +524,70 @@ static const struct rtas_function *rtas_token_to_function(s32 token) /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); -static void do_enter_rtas(struct rtas_args *args) +static void __do_enter_rtas(struct rtas_args *args) +{ + enter_rtas(__pa(args)); + srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ +} + +static void __do_enter_rtas_trace(struct rtas_args *args) { - unsigned long msr; + const char *name = NULL; + /* + * If the tracepoints that consume the function name aren't + * active, avoid the lookup. + */ + if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) { + const s32 token = be32_to_cpu(args->token); + const struct rtas_function *func = rtas_token_to_function(token); + + name = func->name; + } + + trace_rtas_input(args, name); + trace_rtas_ll_entry(args); + + __do_enter_rtas(args); + trace_rtas_ll_exit(args); + trace_rtas_output(args, name); +} + +static void do_enter_rtas(struct rtas_args *args) +{ + const unsigned long msr = mfmsr(); + /* + * Situations where we want to skip any active tracepoints for + * safety reasons: + * + * 1. The last code executed on an offline CPU as it stops, + * i.e. we're about to call stop-self. The tracepoints' + * function name lookup uses xarray, which uses RCU, which + * isn't valid to call on an offline CPU. Any events + * emitted on an offline CPU will be discarded anyway. + * + * 2. In real mode, as when invoking ibm,nmi-interlock from + * the pseries MCE handler. We cannot count on trace + * buffers or the entries in rtas_token_to_function_xarray + * to be contained in the RMO. + */ + const unsigned long mask = MSR_IR | MSR_DR; + const bool can_trace = likely(cpu_online(raw_smp_processor_id()) && + (msr & mask) == mask); /* * Make sure MSR[RI] is currently enabled as it will be forced later * in enter_rtas. */ - msr = mfmsr(); BUG_ON(!(msr & MSR_RI)); BUG_ON(!irqs_disabled()); hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */ - enter_rtas(__pa(args)); - - srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ + if (can_trace) + __do_enter_rtas_trace(args); + else + __do_enter_rtas(args); } struct rtas_t rtas; -- cgit v1.2.3 From 43033bc62d349d8d852855a336c91d046de819bd Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:00 -0600 Subject: powerpc/pseries: add RTAS work area allocator Various pseries-specific RTAS functions take a temporary "work area" parameter - a buffer in memory accessible to RTAS. Typically such functions are passed the statically allocated rtas_data_buf buffer as the argument. This buffer is protected by a global spinlock. So users of rtas_data_buf cannot perform sleeping operations while accessing the buffer. Most RTAS functions that have a work area parameter can return a status (-2/990x) that indicates that the caller should retry. Before retrying, the caller may need to reschedule or sleep (see rtas_busy_delay() for details). This combination of factors leads to uncomfortable constructions like this: do { spin_lock(&rtas_data_buf_lock); rc = rtas_call(token, __pa(rtas_data_buf, ...); if (rc == 0) { /* parse or copy out rtas_data_buf contents */ } spin_unlock(&rtas_data_buf_lock); } while (rtas_busy_delay(rc)); Another unfortunately common way of handling this is for callers to blithely ignore the possibility of a -2/990x status and hope for the best. If users were allowed to perform blocking operations while owning a work area, the programming model would become less tedious and error-prone. Users could schedule away, sleep, or perform other blocking operations without having to release and re-acquire resources. We could continue to use a single work area buffer, and convert rtas_data_buf_lock to a mutex. But that would impose an unnecessarily coarse serialization on all users. As awkward as the current design is, it prevents longer running operations that need to repeatedly use rtas_data_buf from blocking the progress of others. There are more considerations. One is that while 4KB is fine for all current in-kernel uses, some RTAS calls can take much smaller buffers, and some (VPD, platform dumps) would likely benefit from larger ones. Another is that at least one RTAS function (ibm,get-vpd) has *two* work area parameters. And finally, we should expect the number of work area users in the kernel to increase over time as we introduce lockdown-compatible ABIs to replace less safe use cases based on sys_rtas/librtas. So a special-purpose allocator for RTAS work area buffers seems worth trying. Properties: * The backing memory for the allocator is reserved early in boot in order to satisfy RTAS addressing requirements, and then managed with genalloc. * Allocations can block, but they never fail (mempool-like). * Prioritizes first-come, first-serve fairness over throughput. * Early boot allocations before the allocator has been initialized are served via an internal static buffer. Intended to replace rtas_data_buf. New code that needs RTAS work area buffers should prefer this API. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-12-26929c8cce78@linux.ibm.com --- arch/powerpc/include/asm/rtas-work-area.h | 96 +++++++++++ arch/powerpc/kernel/rtas.c | 3 + arch/powerpc/platforms/pseries/Makefile | 2 +- arch/powerpc/platforms/pseries/rtas-work-area.c | 209 ++++++++++++++++++++++++ 4 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/rtas-work-area.h create mode 100644 arch/powerpc/platforms/pseries/rtas-work-area.c diff --git a/arch/powerpc/include/asm/rtas-work-area.h b/arch/powerpc/include/asm/rtas-work-area.h new file mode 100644 index 000000000000..251a395dbd2e --- /dev/null +++ b/arch/powerpc/include/asm/rtas-work-area.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_RTAS_WORK_AREA_H +#define _ASM_POWERPC_RTAS_WORK_AREA_H + +#include +#include +#include + +#include + +/** + * struct rtas_work_area - RTAS work area descriptor. + * + * Descriptor for a "work area" in PAPR terminology that satisfies + * RTAS addressing requirements. + */ +struct rtas_work_area { + /* private: Use the APIs provided below. */ + char *buf; + size_t size; +}; + +enum { + /* Maximum allocation size, enforced at build time. */ + RTAS_WORK_AREA_MAX_ALLOC_SZ = SZ_128K, +}; + +/** + * rtas_work_area_alloc() - Acquire a work area of the requested size. + * @size_: Allocation size. Must be compile-time constant and not more + * than %RTAS_WORK_AREA_MAX_ALLOC_SZ. + * + * Allocate a buffer suitable for passing to RTAS functions that have + * a memory address parameter, often (but not always) referred to as a + * "work area" in PAPR. Although callers are allowed to block while + * holding a work area, the amount of memory reserved for this purpose + * is limited, and allocations should be short-lived. A good guideline + * is to release any allocated work area before returning from a + * system call. + * + * This function does not fail. It blocks until the allocation + * succeeds. To prevent deadlocks, callers are discouraged from + * allocating more than one work area simultaneously in a single task + * context. + * + * Context: This function may sleep. + * Return: A &struct rtas_work_area descriptor for the allocated work area. + */ +#define rtas_work_area_alloc(size_) ({ \ + static_assert(__builtin_constant_p(size_)); \ + static_assert((size_) > 0); \ + static_assert((size_) <= RTAS_WORK_AREA_MAX_ALLOC_SZ); \ + __rtas_work_area_alloc(size_); \ +}) + +/* + * Do not call __rtas_work_area_alloc() directly. Use + * rtas_work_area_alloc(). + */ +struct rtas_work_area *__rtas_work_area_alloc(size_t size); + +/** + * rtas_work_area_free() - Release a work area. + * @area: Work area descriptor as returned from rtas_work_area_alloc(). + * + * Return a work area buffer to the pool. + */ +void rtas_work_area_free(struct rtas_work_area *area); + +static inline char *rtas_work_area_raw_buf(const struct rtas_work_area *area) +{ + return area->buf; +} + +static inline size_t rtas_work_area_size(const struct rtas_work_area *area) +{ + return area->size; +} + +static inline phys_addr_t rtas_work_area_phys(const struct rtas_work_area *area) +{ + return __pa(area->buf); +} + +/* + * Early setup for the work area allocator. Call from + * rtas_initialize() only. + */ + +#ifdef CONFIG_PPC_PSERIES +void rtas_work_area_reserve_arena(phys_addr_t limit); +#else /* CONFIG_PPC_PSERIES */ +static inline void rtas_work_area_reserve_arena(phys_addr_t limit) {} +#endif /* CONFIG_PPC_PSERIES */ + +#endif /* _ASM_POWERPC_RTAS_WORK_AREA_H */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a6d0c46b4512..1f80f0b8a9ad 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1939,6 +1940,8 @@ void __init rtas_initialize(void) #endif ibm_open_errinjct_token = rtas_token("ibm,open-errinjct"); ibm_errinjct_token = rtas_token("ibm,errinjct"); + + rtas_work_area_reserve_arena(rtas_region); } int __init early_init_dt_scan_rtas(unsigned long node, diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 20a0f3c3fe04..8992b09a7a20 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ - of_helpers.o \ + of_helpers.o rtas-work-area.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o \ diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c new file mode 100644 index 000000000000..1ee63335bd4b --- /dev/null +++ b/arch/powerpc/platforms/pseries/rtas-work-area.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "rtas-work-area: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +enum { + /* + * Ensure the pool is page-aligned. + */ + RTAS_WORK_AREA_ARENA_ALIGN = PAGE_SIZE, + /* + * Don't let a single allocation claim the whole arena. + */ + RTAS_WORK_AREA_ARENA_SZ = RTAS_WORK_AREA_MAX_ALLOC_SZ * 2, + /* + * The smallest known work area size is for ibm,get-vpd's + * location code argument, which is limited to 79 characters + * plus 1 nul terminator. + * + * PAPR+ 7.3.20 ibm,get-vpd RTAS Call + * PAPR+ 12.3.2.4 Converged Location Code Rules - Length Restrictions + */ + RTAS_WORK_AREA_MIN_ALLOC_SZ = roundup_pow_of_two(80), +}; + +static struct { + struct gen_pool *gen_pool; + char *arena; + struct mutex mutex; /* serializes allocations */ + struct wait_queue_head wqh; + mempool_t descriptor_pool; + bool available; +} rwa_state = { + .mutex = __MUTEX_INITIALIZER(rwa_state.mutex), + .wqh = __WAIT_QUEUE_HEAD_INITIALIZER(rwa_state.wqh), +}; + +/* + * A single work area buffer and descriptor to serve requests early in + * boot before the allocator is fully initialized. We know 4KB is the + * most any boot time user needs (they all call ibm,get-system-parameter). + */ +static bool early_work_area_in_use __initdata; +static char early_work_area_buf[SZ_4K] __initdata __aligned(SZ_4K); +static struct rtas_work_area early_work_area __initdata = { + .buf = early_work_area_buf, + .size = sizeof(early_work_area_buf), +}; + + +static struct rtas_work_area * __init rtas_work_area_alloc_early(size_t size) +{ + WARN_ON(size > early_work_area.size); + WARN_ON(early_work_area_in_use); + early_work_area_in_use = true; + memset(early_work_area.buf, 0, early_work_area.size); + return &early_work_area; +} + +static void __init rtas_work_area_free_early(struct rtas_work_area *work_area) +{ + WARN_ON(work_area != &early_work_area); + WARN_ON(!early_work_area_in_use); + early_work_area_in_use = false; +} + +struct rtas_work_area * __ref __rtas_work_area_alloc(size_t size) +{ + struct rtas_work_area *area; + unsigned long addr; + + might_sleep(); + + /* + * The rtas_work_area_alloc() wrapper enforces this at build + * time. Requests that exceed the arena size will block + * indefinitely. + */ + WARN_ON(size > RTAS_WORK_AREA_MAX_ALLOC_SZ); + + if (!rwa_state.available) + return rtas_work_area_alloc_early(size); + /* + * To ensure FCFS behavior and prevent a high rate of smaller + * requests from starving larger ones, use the mutex to queue + * allocations. + */ + mutex_lock(&rwa_state.mutex); + wait_event(rwa_state.wqh, + (addr = gen_pool_alloc(rwa_state.gen_pool, size)) != 0); + mutex_unlock(&rwa_state.mutex); + + area = mempool_alloc(&rwa_state.descriptor_pool, GFP_KERNEL); + area->buf = (char *)addr; + area->size = size; + + return area; +} + +void __ref rtas_work_area_free(struct rtas_work_area *area) +{ + if (!rwa_state.available) { + rtas_work_area_free_early(area); + return; + } + + gen_pool_free(rwa_state.gen_pool, (unsigned long)area->buf, area->size); + mempool_free(area, &rwa_state.descriptor_pool); + wake_up(&rwa_state.wqh); +} + +/* + * Initialization of the work area allocator happens in two parts. To + * reliably reserve an arena that satisfies RTAS addressing + * requirements, we must perform a memblock allocation early, + * immmediately after RTAS instantiation. Then we have to wait until + * the slab allocator is up before setting up the descriptor mempool + * and adding the arena to a gen_pool. + */ +static __init int rtas_work_area_allocator_init(void) +{ + const unsigned int order = ilog2(RTAS_WORK_AREA_MIN_ALLOC_SZ); + const phys_addr_t pa_start = __pa(rwa_state.arena); + const phys_addr_t pa_end = pa_start + RTAS_WORK_AREA_ARENA_SZ - 1; + struct gen_pool *pool; + const int nid = NUMA_NO_NODE; + int err; + + err = -ENOMEM; + if (!rwa_state.arena) + goto err_out; + + pool = gen_pool_create(order, nid); + if (!pool) + goto err_out; + /* + * All RTAS functions that consume work areas are OK with + * natural alignment, when they have alignment requirements at + * all. + */ + gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL); + + err = gen_pool_add(pool, (unsigned long)rwa_state.arena, + RTAS_WORK_AREA_ARENA_SZ, nid); + if (err) + goto err_destroy; + + err = mempool_init_kmalloc_pool(&rwa_state.descriptor_pool, 1, + sizeof(struct rtas_work_area)); + if (err) + goto err_destroy; + + rwa_state.gen_pool = pool; + rwa_state.available = true; + + pr_debug("arena [%pa-%pa] (%uK), min/max alloc sizes %u/%u\n", + &pa_start, &pa_end, + RTAS_WORK_AREA_ARENA_SZ / SZ_1K, + RTAS_WORK_AREA_MIN_ALLOC_SZ, + RTAS_WORK_AREA_MAX_ALLOC_SZ); + + return 0; + +err_destroy: + gen_pool_destroy(pool); +err_out: + return err; +} +machine_arch_initcall(pseries, rtas_work_area_allocator_init); + +/** + * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas. + */ +void __init rtas_work_area_reserve_arena(const phys_addr_t limit) +{ + const phys_addr_t align = RTAS_WORK_AREA_ARENA_ALIGN; + const phys_addr_t size = RTAS_WORK_AREA_ARENA_SZ; + const phys_addr_t min = MEMBLOCK_LOW_LIMIT; + const int nid = NUMA_NO_NODE; + + /* + * Too early for a machine_is(pseries) check. But PAPR + * effectively mandates that ibm,get-system-parameter is + * present: + * + * R1–7.3.16–1. All platforms must support the System + * Parameters option. + * + * So set up the arena if we find that, with a fallback to + * ibm,configure-connector, just in case. + */ + if (rtas_service_present("ibm,get-system-parameter") || + rtas_service_present("ibm,configure-connector")) + rwa_state.arena = memblock_alloc_try_nid(size, align, min, limit, nid); +} -- cgit v1.2.3 From e27e14231eb541899efc11c33d6eeddcb74767c3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:01 -0600 Subject: powerpc/pseries/dlpar: use RTAS work area API Hold a work area object for the duration of the RTAS ibm,configure-connector sequence, eliminating locking and copying around each RTAS call. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-13-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/dlpar.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 498d6efcb5ae..9b65b50a5456 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -22,6 +22,7 @@ #include #include #include +#include static struct workqueue_struct *pseries_hp_wq; @@ -137,6 +138,7 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, struct property *property; struct property *last_property = NULL; struct cc_workarea *ccwa; + struct rtas_work_area *work_area; char *data_buf; int cc_token; int rc = -1; @@ -145,29 +147,18 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, if (cc_token == RTAS_UNKNOWN_SERVICE) return NULL; - data_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); - if (!data_buf) - return NULL; + work_area = rtas_work_area_alloc(SZ_4K); + data_buf = rtas_work_area_raw_buf(work_area); ccwa = (struct cc_workarea *)&data_buf[0]; ccwa->drc_index = drc_index; ccwa->zero = 0; do { - /* Since we release the rtas_data_buf lock between configure - * connector calls we want to re-populate the rtas_data_buffer - * with the contents of the previous call. - */ - spin_lock(&rtas_data_buf_lock); - - memcpy(rtas_data_buf, data_buf, RTAS_DATA_BUF_SIZE); - rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL); - memcpy(data_buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); - - spin_unlock(&rtas_data_buf_lock); - - if (rtas_busy_delay(rc)) - continue; + do { + rc = rtas_call(cc_token, 2, 1, NULL, + rtas_work_area_phys(work_area), NULL); + } while (rtas_busy_delay(rc)); switch (rc) { case COMPLETE: @@ -227,7 +218,7 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, } while (rc); cc_error: - kfree(data_buf); + rtas_work_area_free(work_area); if (rc) { if (first_dn) -- cgit v1.2.3 From 419e27f32b6dc13c3e6f443d1ad104f2845c444b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:02 -0600 Subject: powerpc/pseries: PAPR system parameter API Introduce a set of APIs for retrieving and updating PAPR system parameters. This encapsulates the toil of temporary RTAS work area management, RTAS function call retries, and translation of RTAS call statuses to conventional error values. There are several places in the kernel that already retrieve system parameters by calling the RTAS ibm,get-system-parameter function directly. These will be converted to papr_sysparm_get() in changes to follow. As for updating system parameters, current practice is to use sys_rtas() from user space; there are no in-kernel users of the RTAS ibm,set-system-parameter function. However this will become deprecated in time because it is not compatible with lockdown. The papr_sysparm_* APIs will form the common basis for in-kernel and user space access to system parameters. The code to expose the set/get capabilities to user space will follow. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-14-26929c8cce78@linux.ibm.com --- arch/powerpc/include/asm/papr-sysparm.h | 38 +++++++ arch/powerpc/platforms/pseries/Makefile | 2 +- arch/powerpc/platforms/pseries/papr-sysparm.c | 151 ++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/papr-sysparm.h create mode 100644 arch/powerpc/platforms/pseries/papr-sysparm.c diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h new file mode 100644 index 000000000000..f5fdbd8ae9db --- /dev/null +++ b/arch/powerpc/include/asm/papr-sysparm.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_PAPR_SYSPARM_H +#define _ASM_POWERPC_PAPR_SYSPARM_H + +typedef struct { + const u32 token; +} papr_sysparm_t; + +#define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, }) + +/* + * Derived from the "Defined Parameters" table in PAPR 7.3.16 System + * Parameters Option. Where the spec says "characteristics", we use + * "attrs" in the symbolic names to keep them from getting too + * unwieldy. + */ +#define PAPR_SYSPARM_SHARED_PROC_LPAR_ATTRS mk_papr_sysparm(20) +#define PAPR_SYSPARM_PROC_MODULE_INFO mk_papr_sysparm(43) +#define PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS mk_papr_sysparm(44) +#define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS mk_papr_sysparm(50) +#define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55) + +enum { + PAPR_SYSPARM_MAX_INPUT = 1024, + PAPR_SYSPARM_MAX_OUTPUT = 4000, +}; + +struct papr_sysparm_buf { + __be16 len; + char val[PAPR_SYSPARM_MAX_OUTPUT]; +}; + +struct papr_sysparm_buf *papr_sysparm_buf_alloc(void); +void papr_sysparm_buf_free(struct papr_sysparm_buf *buf); +int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf); +int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf); + +#endif /* _ASM_POWERPC_PAPR_SYSPARM_H */ diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 8992b09a7a20..53c3b91af2f7 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ - of_helpers.o rtas-work-area.o \ + of_helpers.o rtas-work-area.o papr-sysparm.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o \ diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c new file mode 100644 index 000000000000..2bb5c816399b --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-sysparm.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-sysparm: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +struct papr_sysparm_buf *papr_sysparm_buf_alloc(void) +{ + struct papr_sysparm_buf *buf = kzalloc(sizeof(*buf), GFP_KERNEL); + + return buf; +} + +void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) +{ + kfree(buf); +} + +/** + * papr_sysparm_get() - Retrieve the value of a PAPR system parameter. + * @param: PAPR system parameter token as described in + * 7.3.16 "System Parameters Option". + * @buf: A &struct papr_sysparm_buf as returned from papr_sysparm_buf_alloc(). + * + * Place the result of querying the specified parameter, if available, + * in @buf. The result includes a be16 length header followed by the + * value, which may be a string or binary data. See &struct papr_sysparm_buf. + * + * Since there is at least one parameter (60, OS Service Entitlement + * Status) where the results depend on the incoming contents of the + * work area, the caller-supplied buffer is copied unmodified into the + * work area before calling ibm,get-system-parameter. + * + * A defined parameter may not be implemented on a given system, and + * some implemented parameters may not be available to all partitions + * on a system. A parameter's disposition may change at any time due + * to system configuration changes or partition migration. + * + * Context: This function may sleep. + * + * Return: 0 on success, -errno otherwise. @buf is unmodified on error. + */ + +int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) +{ + const s32 token = rtas_token("ibm,get-system-parameter"); + struct rtas_work_area *work_area; + s32 fwrc; + int ret; + + might_sleep(); + + if (WARN_ON(!buf)) + return -EFAULT; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + work_area = rtas_work_area_alloc(sizeof(*buf)); + + memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); + + do { + fwrc = rtas_call(token, 3, 1, NULL, param.token, + rtas_work_area_phys(work_area), + rtas_work_area_size(work_area)); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case 0: + ret = 0; + memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf)); + break; + case -3: /* parameter not implemented */ + ret = -EOPNOTSUPP; + break; + case -9002: /* this partition not authorized to retrieve this parameter */ + ret = -EPERM; + break; + case -9999: /* "parameter error" e.g. the buffer is too small */ + ret = -EINVAL; + break; + default: + pr_err("unexpected ibm,get-system-parameter result %d\n", fwrc); + fallthrough; + case -1: /* Hardware/platform error */ + ret = -EIO; + break; + } + + rtas_work_area_free(work_area); + + return ret; +} + +int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) +{ + const s32 token = rtas_token("ibm,set-system-parameter"); + struct rtas_work_area *work_area; + s32 fwrc; + int ret; + + might_sleep(); + + if (WARN_ON(!buf)) + return -EFAULT; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + work_area = rtas_work_area_alloc(sizeof(*buf)); + + memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); + + do { + fwrc = rtas_call(token, 2, 1, NULL, param.token, + rtas_work_area_phys(work_area)); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case 0: + ret = 0; + break; + case -3: /* parameter not supported */ + ret = -EOPNOTSUPP; + break; + case -9002: /* this partition not authorized to modify this parameter */ + ret = -EPERM; + break; + case -9999: /* "parameter error" e.g. invalid input data */ + ret = -EINVAL; + break; + default: + pr_err("unexpected ibm,set-system-parameter result %d\n", fwrc); + fallthrough; + case -1: /* Hardware/platform error */ + ret = -EIO; + break; + } + + rtas_work_area_free(work_area); + + return ret; +} -- cgit v1.2.3 From b8dc71774a51182185ae197ed2f8bd085ce6c848 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:03 -0600 Subject: powerpc/pseries: convert CMO probe to papr_sysparm API Convert the direct invocation of the ibm,get-system-parameter RTAS function to papr_sysparm_get(). Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-15-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 74e50b6b28d4..420a2fa48292 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -941,32 +942,21 @@ void pSeries_coalesce_init(void) */ static void __init pSeries_cmo_feature_init(void) { - const s32 token = rtas_token("ibm,get-system-parameter"); + static struct papr_sysparm_buf buf __initdata; + static_assert(sizeof(buf.val) >= CMO_MAXLENGTH); char *ptr, *key, *value, *end; - int call_status; int page_order = IOMMU_PAGE_SHIFT_4K; pr_debug(" -> fw_cmo_feature_init()\n"); - do { - spin_lock(&rtas_data_buf_lock); - call_status = rtas_call(token, 3, 1, NULL, - CMO_CHARACTERISTICS_TOKEN, - __pa(rtas_data_buf), - RTAS_DATA_BUF_SIZE); - if (call_status == 0) - break; - spin_unlock(&rtas_data_buf_lock); - } while (rtas_busy_delay(call_status)); - - if (call_status != 0) { + if (papr_sysparm_get(PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS, &buf)) { pr_debug("CMO not available\n"); pr_debug(" <- fw_cmo_feature_init()\n"); return; } - end = rtas_data_buf + CMO_MAXLENGTH - 2; - ptr = rtas_data_buf + 2; /* step over strlen value */ + end = &buf.val[CMO_MAXLENGTH]; + ptr = &buf.val[0]; key = value = ptr; while (*ptr && (ptr <= end)) { @@ -1012,7 +1002,6 @@ static void __init pSeries_cmo_feature_init(void) } else pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); - spin_unlock(&rtas_data_buf_lock); pr_debug(" <- fw_cmo_feature_init()\n"); } -- cgit v1.2.3 From fff9846be00c467b4a277492af5be8487b6540e9 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:04 -0600 Subject: powerpc/pseries/lparcfg: convert to papr_sysparm API /proc/powerpc/lparcfg derives the LPAR name and SPLPAR characteristics it reports using bare calls to the RTAS ibm,get-system-parameter function. Convert these to the higher-level papr_sysparm API, which handles the tedious details. While the SPLPAR string parsing code could stand to be updated, that should be done in a separate change. It is minimally modified here to reduce the risk of changing behavior. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-16-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/lparcfg.c | 104 +++++++------------------------ 1 file changed, 24 insertions(+), 80 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index cd33d5800763..8acc70509520 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -311,16 +312,6 @@ static void parse_mpp_x_data(struct seq_file *m) seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); } -/* - * PAPR defines, in section "7.3.16 System Parameters Option", the token 55 to - * read the LPAR name, and the largest output data to 4000 + 2 bytes length. - */ -#define SPLPAR_LPAR_NAME_TOKEN 55 -#define GET_SYS_PARM_BUF_SIZE 4002 -#if GET_SYS_PARM_BUF_SIZE > RTAS_DATA_BUF_SIZE -#error "GET_SYS_PARM_BUF_SIZE is larger than RTAS_DATA_BUF_SIZE" -#endif - /* * Read the lpar name using the RTAS ibm,get-system-parameter call. * @@ -332,46 +323,19 @@ static void parse_mpp_x_data(struct seq_file *m) */ static int read_rtas_lpar_name(struct seq_file *m) { - int rc, len, token; - union { - char raw_buffer[GET_SYS_PARM_BUF_SIZE]; - struct { - __be16 len; - char name[GET_SYS_PARM_BUF_SIZE-2]; - }; - } *local_buffer; - - token = rtas_token("ibm,get-system-parameter"); - if (token == RTAS_UNKNOWN_SERVICE) - return -EINVAL; + struct papr_sysparm_buf *buf; + int err; - local_buffer = kmalloc(sizeof(*local_buffer), GFP_KERNEL); - if (!local_buffer) + buf = papr_sysparm_buf_alloc(); + if (!buf) return -ENOMEM; - do { - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, sizeof(*local_buffer)); - rc = rtas_call(token, 3, 1, NULL, SPLPAR_LPAR_NAME_TOKEN, - __pa(rtas_data_buf), sizeof(*local_buffer)); - if (!rc) - memcpy(local_buffer->raw_buffer, rtas_data_buf, - sizeof(local_buffer->raw_buffer)); - spin_unlock(&rtas_data_buf_lock); - } while (rtas_busy_delay(rc)); - - if (!rc) { - /* Force end of string */ - len = min((int) be16_to_cpu(local_buffer->len), - (int) sizeof(local_buffer->name)-1); - local_buffer->name[len] = '\0'; - - seq_printf(m, "partition_name=%s\n", local_buffer->name); - } else - rc = -ENODATA; + err = papr_sysparm_get(PAPR_SYSPARM_LPAR_NAME, buf); + if (!err) + seq_printf(m, "partition_name=%s\n", buf->val); - kfree(local_buffer); - return rc; + papr_sysparm_buf_free(buf); + return err; } /* @@ -397,7 +361,6 @@ static void read_lpar_name(struct seq_file *m) pr_err_once("Error can't get the LPAR name"); } -#define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) /* @@ -408,45 +371,25 @@ static void read_lpar_name(struct seq_file *m) */ static void parse_system_parameter_string(struct seq_file *m) { - const s32 token = rtas_token("ibm,get-system-parameter"); - int call_status; + struct papr_sysparm_buf *buf; - unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); - if (!local_buffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d\n", - __FILE__, __func__, __LINE__); + buf = papr_sysparm_buf_alloc(); + if (!buf) return; - } - do { - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); - call_status = rtas_call(token, 3, 1, NULL, SPLPAR_CHARACTERISTICS_TOKEN, - __pa(rtas_data_buf), RTAS_DATA_BUF_SIZE); - memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); - local_buffer[SPLPAR_MAXLENGTH - 1] = '\0'; - spin_unlock(&rtas_data_buf_lock); - } while (rtas_busy_delay(call_status)); - - if (call_status != 0) { - printk(KERN_INFO - "%s %s Error calling get-system-parameter (0x%x)\n", - __FILE__, __func__, call_status); + if (papr_sysparm_get(PAPR_SYSPARM_SHARED_PROC_LPAR_ATTRS, buf)) { + goto out_free; } else { + const char *local_buffer; int splpar_strlen; int idx, w_idx; char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); - if (!workbuffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d\n", - __FILE__, __func__, __LINE__); - kfree(local_buffer); - return; - } -#ifdef LPARCFG_DEBUG - printk(KERN_INFO "success calling get-system-parameter\n"); -#endif - splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; - local_buffer += 2; /* step over strlen value */ + + if (!workbuffer) + goto out_free; + + splpar_strlen = be16_to_cpu(buf->len); + local_buffer = buf->val; w_idx = 0; idx = 0; @@ -480,7 +423,8 @@ static void parse_system_parameter_string(struct seq_file *m) kfree(workbuffer); local_buffer -= 2; /* back up over strlen value */ } - kfree(local_buffer); +out_free: + papr_sysparm_buf_free(buf); } /* Return the number of processors in the system. -- cgit v1.2.3 From 69b9f5a5b2c04ce5993fe43da938f065571bdb25 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:05 -0600 Subject: powerpc/pseries/hv-24x7: convert to papr_sysparm API The new papr_sysparm API handles the details of system parameter retrieval. Use that instead of open-coding the RTAS call, work area management, and retries. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-17-26929c8cce78@linux.ibm.com --- arch/powerpc/perf/hv-24x7.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 5a8577447c9d..317175791d23 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -66,8 +67,6 @@ static bool is_physical_domain(unsigned int domain) * Refer PAPR+ document to get parameter token value as '43'. */ -#define PROCESSOR_MODULE_INFO 43 - static u32 phys_sockets; /* Physical sockets */ static u32 phys_chipspersocket; /* Physical chips per socket*/ static u32 phys_coresperchip; /* Physical cores per chip */ @@ -79,8 +78,7 @@ static u32 phys_coresperchip; /* Physical cores per chip */ */ void read_24x7_sys_info(void) { - const s32 token = rtas_token("ibm,get-system-parameter"); - int call_status; + struct papr_sysparm_buf *buf; /* * Making system parameter: chips and sockets and cores per chip @@ -90,27 +88,22 @@ void read_24x7_sys_info(void) phys_chipspersocket = 1; phys_coresperchip = 1; - do { - spin_lock(&rtas_data_buf_lock); - call_status = rtas_call(token, 3, 1, NULL, PROCESSOR_MODULE_INFO, - __pa(rtas_data_buf), RTAS_DATA_BUF_SIZE); - if (call_status == 0) { - int ntypes = be16_to_cpup((__be16 *)&rtas_data_buf[2]); - int len = be16_to_cpup((__be16 *)&rtas_data_buf[0]); - - if (len >= 8 && ntypes != 0) { - phys_sockets = be16_to_cpup((__be16 *)&rtas_data_buf[4]); - phys_chipspersocket = be16_to_cpup((__be16 *)&rtas_data_buf[6]); - phys_coresperchip = be16_to_cpup((__be16 *)&rtas_data_buf[8]); - } - } - spin_unlock(&rtas_data_buf_lock); - } while (rtas_busy_delay(call_status)); + buf = papr_sysparm_buf_alloc(); + if (!buf) + return; - if (call_status != 0) { - pr_err("Error calling get-system-parameter %d\n", - call_status); + if (!papr_sysparm_get(PAPR_SYSPARM_PROC_MODULE_INFO, buf)) { + int ntypes = be16_to_cpup((__be16 *)&buf->val[0]); + int len = be16_to_cpu(buf->len); + + if (len >= 8 && ntypes != 0) { + phys_sockets = be16_to_cpup((__be16 *)&buf->val[2]); + phys_chipspersocket = be16_to_cpup((__be16 *)&buf->val[4]); + phys_coresperchip = be16_to_cpup((__be16 *)&buf->val[6]); + } } + + papr_sysparm_buf_free(buf); } /* Domains for which more than one result element are returned for each event. */ -- cgit v1.2.3 From e58d9e17b11b776e32b1d3d80bdc63d39de3463d Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:06 -0600 Subject: powerpc/pseries/lpar: convert to papr_sysparm API Convert the TLB block invalidate characteristics discovery to the new papr_sysparm API. This occurs too early in boot to use papr_sysparm_buf_alloc(), so use a static buffer. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-18-26929c8cce78@linux.ibm.com --- arch/powerpc/platforms/pseries/lpar.c | 37 +++++++++-------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 6597b2126ebb..2eab323f6970 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1469,8 +1470,6 @@ static inline void __init check_lp_set_hblkrm(unsigned int lp, } } -#define SPLPAR_TLB_BIC_TOKEN 50 - /* * The size of the TLB Block Invalidate Characteristics is variable. But at the * maximum it will be the number of possible page sizes *2 + 10 bytes. @@ -1481,42 +1480,24 @@ static inline void __init check_lp_set_hblkrm(unsigned int lp, void __init pseries_lpar_read_hblkrm_characteristics(void) { - const s32 token = rtas_token("ibm,get-system-parameter"); - unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH]; - int call_status, len, idx, bpsize; + static struct papr_sysparm_buf buf __initdata; + int len, idx, bpsize; if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) return; - do { - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE); - call_status = rtas_call(token, 3, 1, NULL, SPLPAR_TLB_BIC_TOKEN, - __pa(rtas_data_buf), RTAS_DATA_BUF_SIZE); - memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH); - local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0'; - spin_unlock(&rtas_data_buf_lock); - } while (rtas_busy_delay(call_status)); - - if (call_status != 0) { - pr_warn("%s %s Error calling get-system-parameter (0x%x)\n", - __FILE__, __func__, call_status); + if (papr_sysparm_get(PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS, &buf)) return; - } - /* - * The first two (2) bytes of the data in the buffer are the length of - * the returned data, not counting these first two (2) bytes. - */ - len = be16_to_cpu(*((u16 *)local_buffer)) + 2; + len = be16_to_cpu(buf.len); if (len > SPLPAR_TLB_BIC_MAXLENGTH) { pr_warn("%s too large returned buffer %d", __func__, len); return; } - idx = 2; + idx = 0; while (idx < len) { - u8 block_shift = local_buffer[idx++]; + u8 block_shift = buf.val[idx++]; u32 block_size; unsigned int npsize; @@ -1525,9 +1506,9 @@ void __init pseries_lpar_read_hblkrm_characteristics(void) block_size = 1 << block_shift; - for (npsize = local_buffer[idx++]; + for (npsize = buf.val[idx++]; npsize > 0 && idx < len; npsize--) - check_lp_set_hblkrm((unsigned int) local_buffer[idx++], + check_lp_set_hblkrm((unsigned int)buf.val[idx++], block_size); } -- cgit v1.2.3 From 716bfc97bd5fb7b442cdd06081f49df097f2e27b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:07 -0600 Subject: powerpc/rtas: introduce rtas_function_token() API Users of rtas_token() supply a string argument that can't be validated at build time. A typo or misspelling has to be caught by inspection or by observing wrong behavior at runtime. Since the core RTAS code now has consolidated the names of all possible RTAS functions and mapped them to their tokens, token lookup can be implemented using symbolic constants to index a static array. So introduce rtas_function_token(), a replacement API which does that, along with a rtas_service_present()-equivalent helper, rtas_function_implemented(). Callers supply an opaque predefined function handle which is used internally to index the function table. Typos or other inappropriate arguments yield build errors, and the function handle is a type that can't be easily confused with RTAS tokens or other integer types. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-19-26929c8cce78@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 99 +++++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/rtas.c | 28 +++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index f03891891a2d..3abe15ac79db 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -101,6 +101,100 @@ enum rtas_function_index { RTAS_FNIDX__WRITE_PCI_CONFIG, }; +/* + * Opaque handle for client code to refer to RTAS functions. All valid + * function handles are build-time constants prefixed with RTAS_FN_. + */ +typedef struct { + const enum rtas_function_index index; +} rtas_fn_handle_t; + + +#define rtas_fn_handle(x_) ((const rtas_fn_handle_t) { .index = x_, }) + +#define RTAS_FN_CHECK_EXCEPTION rtas_fn_handle(RTAS_FNIDX__CHECK_EXCEPTION) +#define RTAS_FN_DISPLAY_CHARACTER rtas_fn_handle(RTAS_FNIDX__DISPLAY_CHARACTER) +#define RTAS_FN_EVENT_SCAN rtas_fn_handle(RTAS_FNIDX__EVENT_SCAN) +#define RTAS_FN_FREEZE_TIME_BASE rtas_fn_handle(RTAS_FNIDX__FREEZE_TIME_BASE) +#define RTAS_FN_GET_POWER_LEVEL rtas_fn_handle(RTAS_FNIDX__GET_POWER_LEVEL) +#define RTAS_FN_GET_SENSOR_STATE rtas_fn_handle(RTAS_FNIDX__GET_SENSOR_STATE) +#define RTAS_FN_GET_TERM_CHAR rtas_fn_handle(RTAS_FNIDX__GET_TERM_CHAR) +#define RTAS_FN_GET_TIME_OF_DAY rtas_fn_handle(RTAS_FNIDX__GET_TIME_OF_DAY) +#define RTAS_FN_IBM_ACTIVATE_FIRMWARE rtas_fn_handle(RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE) +#define RTAS_FN_IBM_CBE_START_PTCAL rtas_fn_handle(RTAS_FNIDX__IBM_CBE_START_PTCAL) +#define RTAS_FN_IBM_CBE_STOP_PTCAL rtas_fn_handle(RTAS_FNIDX__IBM_CBE_STOP_PTCAL) +#define RTAS_FN_IBM_CHANGE_MSI rtas_fn_handle(RTAS_FNIDX__IBM_CHANGE_MSI) +#define RTAS_FN_IBM_CLOSE_ERRINJCT rtas_fn_handle(RTAS_FNIDX__IBM_CLOSE_ERRINJCT) +#define RTAS_FN_IBM_CONFIGURE_BRIDGE rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_BRIDGE) +#define RTAS_FN_IBM_CONFIGURE_CONNECTOR rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR) +#define RTAS_FN_IBM_CONFIGURE_KERNEL_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP) +#define RTAS_FN_IBM_CONFIGURE_PE rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_PE) +#define RTAS_FN_IBM_CREATE_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW) +#define RTAS_FN_IBM_DISPLAY_MESSAGE rtas_fn_handle(RTAS_FNIDX__IBM_DISPLAY_MESSAGE) +#define RTAS_FN_IBM_ERRINJCT rtas_fn_handle(RTAS_FNIDX__IBM_ERRINJCT) +#define RTAS_FN_IBM_EXTI2C rtas_fn_handle(RTAS_FNIDX__IBM_EXTI2C) +#define RTAS_FN_IBM_GET_CONFIG_ADDR_INFO rtas_fn_handle(RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO) +#define RTAS_FN_IBM_GET_CONFIG_ADDR_INFO2 rtas_fn_handle(RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2) +#define RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE rtas_fn_handle(RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE) +#define RTAS_FN_IBM_GET_INDICES rtas_fn_handle(RTAS_FNIDX__IBM_GET_INDICES) +#define RTAS_FN_IBM_GET_RIO_TOPOLOGY rtas_fn_handle(RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY) +#define RTAS_FN_IBM_GET_SYSTEM_PARAMETER rtas_fn_handle(RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER) +#define RTAS_FN_IBM_GET_VPD rtas_fn_handle(RTAS_FNIDX__IBM_GET_VPD) +#define RTAS_FN_IBM_GET_XIVE rtas_fn_handle(RTAS_FNIDX__IBM_GET_XIVE) +#define RTAS_FN_IBM_INT_OFF rtas_fn_handle(RTAS_FNIDX__IBM_INT_OFF) +#define RTAS_FN_IBM_INT_ON rtas_fn_handle(RTAS_FNIDX__IBM_INT_ON) +#define RTAS_FN_IBM_IO_QUIESCE_ACK rtas_fn_handle(RTAS_FNIDX__IBM_IO_QUIESCE_ACK) +#define RTAS_FN_IBM_LPAR_PERFTOOLS rtas_fn_handle(RTAS_FNIDX__IBM_LPAR_PERFTOOLS) +#define RTAS_FN_IBM_MANAGE_FLASH_IMAGE rtas_fn_handle(RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE) +#define RTAS_FN_IBM_MANAGE_STORAGE_PRESERVATION rtas_fn_handle(RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION) +#define RTAS_FN_IBM_NMI_INTERLOCK rtas_fn_handle(RTAS_FNIDX__IBM_NMI_INTERLOCK) +#define RTAS_FN_IBM_NMI_REGISTER rtas_fn_handle(RTAS_FNIDX__IBM_NMI_REGISTER) +#define RTAS_FN_IBM_OPEN_ERRINJCT rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_ERRINJCT) +#define RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE) +#define RTAS_FN_IBM_OPEN_SRIOV_MAP_PE_NUMBER rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER) +#define RTAS_FN_IBM_OS_TERM rtas_fn_handle(RTAS_FNIDX__IBM_OS_TERM) +#define RTAS_FN_IBM_PARTNER_CONTROL rtas_fn_handle(RTAS_FNIDX__IBM_PARTNER_CONTROL) +#define RTAS_FN_IBM_PHYSICAL_ATTESTATION rtas_fn_handle(RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION) +#define RTAS_FN_IBM_PLATFORM_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_PLATFORM_DUMP) +#define RTAS_FN_IBM_POWER_OFF_UPS rtas_fn_handle(RTAS_FNIDX__IBM_POWER_OFF_UPS) +#define RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER rtas_fn_handle(RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER) +#define RTAS_FN_IBM_QUERY_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW) +#define RTAS_FN_IBM_READ_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__IBM_READ_PCI_CONFIG) +#define RTAS_FN_IBM_READ_SLOT_RESET_STATE rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE) +#define RTAS_FN_IBM_READ_SLOT_RESET_STATE2 rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2) +#define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW) +#define RTAS_FN_IBM_RESET_PE_DMA_WINDOWS rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS) +#define RTAS_FN_IBM_SCAN_LOG_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP) +#define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR) +#define RTAS_FN_IBM_SET_EEH_OPTION rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION) +#define RTAS_FN_IBM_SET_SLOT_RESET rtas_fn_handle(RTAS_FNIDX__IBM_SET_SLOT_RESET) +#define RTAS_FN_IBM_SET_SYSTEM_PARAMETER rtas_fn_handle(RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER) +#define RTAS_FN_IBM_SET_XIVE rtas_fn_handle(RTAS_FNIDX__IBM_SET_XIVE) +#define RTAS_FN_IBM_SLOT_ERROR_DETAIL rtas_fn_handle(RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL) +#define RTAS_FN_IBM_SUSPEND_ME rtas_fn_handle(RTAS_FNIDX__IBM_SUSPEND_ME) +#define RTAS_FN_IBM_TUNE_DMA_PARMS rtas_fn_handle(RTAS_FNIDX__IBM_TUNE_DMA_PARMS) +#define RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT) +#define RTAS_FN_IBM_UPDATE_NODES rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_NODES) +#define RTAS_FN_IBM_UPDATE_PROPERTIES rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_PROPERTIES) +#define RTAS_FN_IBM_VALIDATE_FLASH_IMAGE rtas_fn_handle(RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE) +#define RTAS_FN_IBM_WRITE_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__IBM_WRITE_PCI_CONFIG) +#define RTAS_FN_NVRAM_FETCH rtas_fn_handle(RTAS_FNIDX__NVRAM_FETCH) +#define RTAS_FN_NVRAM_STORE rtas_fn_handle(RTAS_FNIDX__NVRAM_STORE) +#define RTAS_FN_POWER_OFF rtas_fn_handle(RTAS_FNIDX__POWER_OFF) +#define RTAS_FN_PUT_TERM_CHAR rtas_fn_handle(RTAS_FNIDX__PUT_TERM_CHAR) +#define RTAS_FN_QUERY_CPU_STOPPED_STATE rtas_fn_handle(RTAS_FNIDX__QUERY_CPU_STOPPED_STATE) +#define RTAS_FN_READ_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__READ_PCI_CONFIG) +#define RTAS_FN_RTAS_LAST_ERROR rtas_fn_handle(RTAS_FNIDX__RTAS_LAST_ERROR) +#define RTAS_FN_SET_INDICATOR rtas_fn_handle(RTAS_FNIDX__SET_INDICATOR) +#define RTAS_FN_SET_POWER_LEVEL rtas_fn_handle(RTAS_FNIDX__SET_POWER_LEVEL) +#define RTAS_FN_SET_TIME_FOR_POWER_ON rtas_fn_handle(RTAS_FNIDX__SET_TIME_FOR_POWER_ON) +#define RTAS_FN_SET_TIME_OF_DAY rtas_fn_handle(RTAS_FNIDX__SET_TIME_OF_DAY) +#define RTAS_FN_START_CPU rtas_fn_handle(RTAS_FNIDX__START_CPU) +#define RTAS_FN_STOP_SELF rtas_fn_handle(RTAS_FNIDX__STOP_SELF) +#define RTAS_FN_SYSTEM_REBOOT rtas_fn_handle(RTAS_FNIDX__SYSTEM_REBOOT) +#define RTAS_FN_THAW_TIME_BASE rtas_fn_handle(RTAS_FNIDX__THAW_TIME_BASE) +#define RTAS_FN_WRITE_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__WRITE_PCI_CONFIG) + #define RTAS_UNKNOWN_SERVICE (-1) #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */ @@ -307,6 +401,11 @@ extern void (*rtas_flash_term_hook)(int); extern struct rtas_t rtas; +s32 rtas_function_token(const rtas_fn_handle_t handle); +static inline bool rtas_function_implemented(const rtas_fn_handle_t handle) +{ + return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE; +} extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1f80f0b8a9ad..bb6f5370c279 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -453,6 +453,32 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { }, }; +/** + * rtas_function_token() - RTAS function token lookup. + * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN. + * + * Context: Any context. + * Return: the token value for the function if implemented by this platform, + * otherwise RTAS_UNKNOWN_SERVICE. + */ +s32 rtas_function_token(const rtas_fn_handle_t handle) +{ + const size_t index = handle.index; + const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table); + + if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index)) + return RTAS_UNKNOWN_SERVICE; + /* + * Various drivers attempt token lookups on non-RTAS + * platforms. + */ + if (!rtas.dev) + return RTAS_UNKNOWN_SERVICE; + + return rtas_function_table[index].token; +} +EXPORT_SYMBOL_GPL(rtas_function_token); + static int rtas_function_cmp(const void *a, const void *b) { const struct rtas_function *f1 = a; @@ -1011,7 +1037,7 @@ static int ibm_errinjct_token; * @....: List of @nargs input parameters. * * Invokes the RTAS function indicated by @token, which the caller - * should obtain via rtas_token(). + * should obtain via rtas_function_token(). * * The @nargs and @nret arguments must match the number of input and * output parameters specified for the RTAS function. -- cgit v1.2.3 From 08273c9f619cb32fb041935724f576e607101f3b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 10 Feb 2023 12:42:08 -0600 Subject: powerpc/rtas: arch-wide function token lookup conversions With the tokens for all implemented RTAS functions now available via rtas_function_token(), which is optimal and safe for arbitrary contexts, there is no need to use rtas_token() or cache its result. Most conversions are trivial, but a few are worth describing in more detail: * Error injection token comparisons for lockdown purposes are consolidated into a simple predicate: token_is_restricted_errinjct(). * A couple of special cases in block_rtas_call() do not use rtas_token() but perform string comparisons against names in the function table. These are converted to compare against token values instead, which is logically equivalent but less expensive. * The lookup for the ibm,os-term token can be deferred until needed, instead of caching it at boot to avoid device tree traversal during panic. * Since rtas_function_token() accesses a read-only data structure without taking any locks, xmon's lookup of set-indicator can be performed as needed instead of cached at startup. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230125-b4-powerpc-rtas-queue-v3-20-26929c8cce78@linux.ibm.com --- arch/powerpc/kernel/rtas-proc.c | 24 ++++---- arch/powerpc/kernel/rtas-rtc.c | 6 +- arch/powerpc/kernel/rtas.c | 79 ++++++++++++------------- arch/powerpc/kernel/rtas_flash.c | 21 ++++--- arch/powerpc/kernel/rtas_pci.c | 8 +-- arch/powerpc/kernel/rtasd.c | 2 +- arch/powerpc/platforms/52xx/efika.c | 4 +- arch/powerpc/platforms/cell/ras.c | 4 +- arch/powerpc/platforms/cell/smp.c | 4 +- arch/powerpc/platforms/chrp/nvram.c | 4 +- arch/powerpc/platforms/chrp/pci.c | 4 +- arch/powerpc/platforms/chrp/setup.c | 4 +- arch/powerpc/platforms/maple/setup.c | 4 +- arch/powerpc/platforms/pseries/dlpar.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 22 +++---- arch/powerpc/platforms/pseries/hotplug-cpu.c | 4 +- arch/powerpc/platforms/pseries/io_event_irq.c | 2 +- arch/powerpc/platforms/pseries/mobility.c | 4 +- arch/powerpc/platforms/pseries/msi.c | 4 +- arch/powerpc/platforms/pseries/nvram.c | 4 +- arch/powerpc/platforms/pseries/papr-sysparm.c | 4 +- arch/powerpc/platforms/pseries/pci.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 2 +- arch/powerpc/platforms/pseries/rtas-work-area.c | 4 +- arch/powerpc/platforms/pseries/setup.c | 8 +-- arch/powerpc/platforms/pseries/smp.c | 6 +- arch/powerpc/sysdev/xics/ics-rtas.c | 8 +-- arch/powerpc/xmon/xmon.c | 16 +---- 28 files changed, 123 insertions(+), 137 deletions(-) diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 081b2b741a8c..9454b8395b6a 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -287,9 +287,9 @@ static ssize_t ppc_rtas_poweron_write(struct file *file, rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_FOR_POWER_ON), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); if (error) printk(KERN_WARNING "error: setting poweron time returned: %s\n", ppc_rtas_process_error(error)); @@ -350,9 +350,9 @@ static ssize_t ppc_rtas_clock_write(struct file *file, return error; rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); if (error) printk(KERN_WARNING "error: setting the clock returned: %s\n", ppc_rtas_process_error(error)); @@ -362,7 +362,7 @@ static ssize_t ppc_rtas_clock_write(struct file *file, static int ppc_rtas_clock_show(struct seq_file *m, void *v) { int ret[8]; - int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + int error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); if (error) { printk(KERN_WARNING "error: reading the clock returned: %s\n", @@ -385,7 +385,7 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v) { int i,j; int state, error; - int get_sensor_state = rtas_token("get-sensor-state"); + int get_sensor_state = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n"); seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n"); @@ -708,8 +708,8 @@ static ssize_t ppc_rtas_tone_freq_write(struct file *file, return error; rtas_tone_frequency = freq; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_FREQUENCY, 0, freq); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); if (error) printk(KERN_WARNING "error: setting tone frequency returned: %s\n", ppc_rtas_process_error(error)); @@ -736,8 +736,8 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file, volume = 100; rtas_tone_volume = volume; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_VOLUME, 0, volume); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_VOLUME, 0, volume); if (error) printk(KERN_WARNING "error: setting tone volume returned: %s\n", ppc_rtas_process_error(error)); diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index 5a31d1829bca..6996214532bd 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -21,7 +21,7 @@ time64_t __init rtas_get_boot_time(void) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -53,7 +53,7 @@ void rtas_get_rtc_time(struct rtc_time *rtc_tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -90,7 +90,7 @@ int rtas_set_rtc_time(struct rtc_time *tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, 0); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index bb6f5370c279..31175b34856a 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -782,8 +782,8 @@ void rtas_progress(char *s, unsigned short hex) "ibm,display-truncation-length", NULL); of_node_put(root); } - display_character = rtas_token("display-character"); - set_indicator = rtas_token("set-indicator"); + display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER); + set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR); } if (display_character == RTAS_UNKNOWN_SERVICE) { @@ -937,7 +937,6 @@ static void __init init_error_log_max(void) static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; -static int rtas_last_error_token; /** Return a copy of the detailed error text associated with the * most recent failed call to rtas. Because the error text @@ -947,16 +946,17 @@ static int rtas_last_error_token; */ static char *__fetch_rtas_last_error(char *altbuf) { + const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR); struct rtas_args err_args, save_args; u32 bufsz; char *buf = NULL; - if (rtas_last_error_token == -1) + if (token == -1) return NULL; bufsz = rtas_get_error_log_max(); - err_args.token = cpu_to_be32(rtas_last_error_token); + err_args.token = cpu_to_be32(token); err_args.nargs = cpu_to_be32(2); err_args.nret = cpu_to_be32(1); err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf)); @@ -1025,8 +1025,11 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, va_end(list); } -static int ibm_open_errinjct_token; -static int ibm_errinjct_token; +static bool token_is_restricted_errinjct(s32 token) +{ + return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) || + token == rtas_function_token(RTAS_FN_IBM_ERRINJCT); +} /** * rtas_call() - Invoke an RTAS firmware function. @@ -1098,7 +1101,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) return -1; - if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + if (token_is_restricted_errinjct(token)) { /* * It would be nicer to not discard the error value * from security_locked_down(), but callers expect an @@ -1330,7 +1333,7 @@ static int rtas_error_rc(int rtas_rc) int rtas_get_power_level(int powerdomain, int *level) { - int token = rtas_token("get-power-level"); + int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -1347,7 +1350,7 @@ EXPORT_SYMBOL_GPL(rtas_get_power_level); int rtas_set_power_level(int powerdomain, int level, int *setlevel) { - int token = rtas_token("set-power-level"); + int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -1365,7 +1368,7 @@ EXPORT_SYMBOL_GPL(rtas_set_power_level); int rtas_get_sensor(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -1383,7 +1386,7 @@ EXPORT_SYMBOL_GPL(rtas_get_sensor); int rtas_get_sensor_fast(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -1425,7 +1428,7 @@ bool rtas_indicator_present(int token, int *maxindex) int rtas_set_indicator(int indicator, int index, int new_value) { - int token = rtas_token("set-indicator"); + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -1446,8 +1449,8 @@ EXPORT_SYMBOL_GPL(rtas_set_indicator); */ int rtas_set_indicator_fast(int indicator, int index, int new_value) { + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; - int token = rtas_token("set-indicator"); if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; @@ -1489,10 +1492,11 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) */ int rtas_ibm_suspend_me(int *fw_status) { + int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME); int fwrc; int ret; - fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + fwrc = rtas_call(token, 0, 1, NULL); switch (fwrc) { case 0: @@ -1525,7 +1529,7 @@ void __noreturn rtas_restart(char *cmd) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); pr_emerg("system-reboot returned %d\n", - rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL)); for (;;); } @@ -1535,7 +1539,7 @@ void rtas_power_off(void) rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ pr_emerg("power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } @@ -1545,16 +1549,17 @@ void __noreturn rtas_halt(void) rtas_flash_term_hook(SYS_HALT); /* allow power on only with power button press */ pr_emerg("power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } /* Must be in the RMO region, so we place it here */ static char rtas_os_term_buf[2048]; -static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE; +static bool ibm_extended_os_term; void rtas_os_term(char *str) { + s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM); int status; /* @@ -1563,7 +1568,8 @@ void rtas_os_term(char *str) * this property may terminate the partition which we want to avoid * since it interferes with panic_timeout. */ - if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE) + + if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); @@ -1574,8 +1580,7 @@ void rtas_os_term(char *str) * schedules. */ do { - status = rtas_call(ibm_os_term_token, 1, 1, NULL, - __pa(rtas_os_term_buf)); + status = rtas_call(token, 1, 1, NULL, __pa(rtas_os_term_buf)); } while (rtas_busy_delay_time(status)); if (status != 0) @@ -1595,10 +1600,9 @@ void rtas_os_term(char *str) */ void rtas_activate_firmware(void) { - int token; + int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE); int fwrc; - token = rtas_token("ibm,activate-firmware"); if (token == RTAS_UNKNOWN_SERVICE) { pr_notice("ibm,activate-firmware method unavailable\n"); return; @@ -1684,6 +1688,8 @@ static bool block_rtas_call(int token, int nargs, { const struct rtas_function *func; const struct rtas_filter *f; + const bool is_platform_dump = token == rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP); + const bool is_config_conn = token == rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR); u32 base, size, end; /* @@ -1720,8 +1726,7 @@ static bool block_rtas_call(int token, int nargs, * Special case for ibm,platform-dump - NULL buffer * address is used to indicate end of dump processing */ - if (!strcmp(func->name, "ibm,platform-dump") && - base == 0) + if (is_platform_dump && base == 0) return false; if (!in_rmo_buf(base, end)) @@ -1742,8 +1747,7 @@ static bool block_rtas_call(int token, int nargs, * Special case for ibm,configure-connector where the * address can be 0 */ - if (!strcmp(func->name, "ibm,configure-connector") && - base == 0) + if (is_config_conn && base == 0) return false; if (!in_rmo_buf(base, end)) @@ -1798,7 +1802,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) if (block_rtas_call(token, nargs, &args)) return -EINVAL; - if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + if (token_is_restricted_errinjct(token)) { int err; err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION); @@ -1807,7 +1811,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) } /* Need to handle ibm,suspend_me call specially */ - if (token == rtas_token("ibm,suspend-me")) { + if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) { /* * rtas_ibm_suspend_me assumes the streamid handle is in cpu @@ -1942,11 +1946,10 @@ void __init rtas_initialize(void) rtas_function_table_init(); /* - * Discover these now to avoid device tree lookups in the + * Discover this now to avoid a device tree lookup in the * panic path. */ - if (of_property_read_bool(rtas.dev, "ibm,extended-os-term")) - ibm_os_term_token = rtas_token("ibm,os-term"); + ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term"); /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any @@ -1961,12 +1964,6 @@ void __init rtas_initialize(void) panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", PAGE_SIZE, &rtas_region); -#ifdef CONFIG_RTAS_ERROR_LOGGING - rtas_last_error_token = rtas_token("rtas-last-error"); -#endif - ibm_open_errinjct_token = rtas_token("ibm,open-errinjct"); - ibm_errinjct_token = rtas_token("ibm,errinjct"); - rtas_work_area_reserve_arena(rtas_region); } @@ -2022,13 +2019,13 @@ void rtas_give_timebase(void) raw_spin_lock_irqsave(&timebase_lock, flags); hard_irq_disable(); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL); timebase = get_tb(); raw_spin_unlock(&timebase_lock); while (timebase) barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL); local_irq_restore(flags); } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index bc817a5619d6..4caf5e3079eb 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -376,7 +376,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf, unsigned int op) s32 rc; do { - rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, 1, + rc = rtas_call(rtas_function_token(RTAS_FN_IBM_MANAGE_FLASH_IMAGE), 1, 1, NULL, op); } while (rtas_busy_delay(rc)); @@ -444,7 +444,7 @@ error: */ static void validate_flash(struct rtas_validate_flash_t *args_buf) { - int token = rtas_token("ibm,validate-flash-image"); + int token = rtas_function_token(RTAS_FN_IBM_VALIDATE_FLASH_IMAGE); int update_results; s32 rc; @@ -570,7 +570,7 @@ static void rtas_flash_firmware(int reboot_type) return; } - update_token = rtas_token("ibm,update-flash-64-and-reboot"); + update_token = rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT); if (update_token == RTAS_UNKNOWN_SERVICE) { printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot " "is not available -- not a service partition?\n"); @@ -653,7 +653,7 @@ static void rtas_flash_firmware(int reboot_type) */ struct rtas_flash_file { const char *filename; - const char *rtas_call_name; + const rtas_fn_handle_t handle; int *status; const struct proc_ops ops; }; @@ -661,7 +661,7 @@ struct rtas_flash_file { static const struct rtas_flash_file rtas_flash_files[] = { { .filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_msg, .ops.proc_write = rtas_flash_write, @@ -670,7 +670,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_num, .ops.proc_write = rtas_flash_write, @@ -679,7 +679,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" VALIDATE_FLASH_NAME, - .rtas_call_name = "ibm,validate-flash-image", + .handle = RTAS_FN_IBM_VALIDATE_FLASH_IMAGE, .status = &rtas_validate_flash_data.status, .ops.proc_read = validate_flash_read, .ops.proc_write = validate_flash_write, @@ -688,7 +688,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" MANAGE_FLASH_NAME, - .rtas_call_name = "ibm,manage-flash-image", + .handle = RTAS_FN_IBM_MANAGE_FLASH_IMAGE, .status = &rtas_manage_flash_data.status, .ops.proc_read = manage_flash_read, .ops.proc_write = manage_flash_write, @@ -700,8 +700,7 @@ static int __init rtas_flash_init(void) { int i; - if (rtas_token("ibm,update-flash-64-and-reboot") == - RTAS_UNKNOWN_SERVICE) { + if (rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT) == RTAS_UNKNOWN_SERVICE) { pr_info("rtas_flash: no firmware flash support\n"); return -EINVAL; } @@ -730,7 +729,7 @@ static int __init rtas_flash_init(void) * This code assumes that the status int is the first member of the * struct */ - token = rtas_token(f->rtas_call_name); + token = rtas_function_token(f->handle); if (token == RTAS_UNKNOWN_SERVICE) *f->status = FLASH_AUTH; else diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 5a2f5ea3b054..e1fdc7473b72 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -191,10 +191,10 @@ static void python_countermeasures(struct device_node *dev) void __init init_pci_config_tokens(void) { - read_pci_config = rtas_token("read-pci-config"); - write_pci_config = rtas_token("write-pci-config"); - ibm_read_pci_config = rtas_token("ibm,read-pci-config"); - ibm_write_pci_config = rtas_token("ibm,write-pci-config"); + read_pci_config = rtas_function_token(RTAS_FN_READ_PCI_CONFIG); + write_pci_config = rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG); + ibm_read_pci_config = rtas_function_token(RTAS_FN_IBM_READ_PCI_CONFIG); + ibm_write_pci_config = rtas_function_token(RTAS_FN_IBM_WRITE_PCI_CONFIG); } unsigned long get_phb_buid(struct device_node *phb) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index cc56ac6ba4b0..9bba469239fc 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -506,7 +506,7 @@ static int __init rtas_event_scan_init(void) return 0; /* No RTAS */ - event_scan = rtas_token("event-scan"); + event_scan = rtas_function_token(RTAS_FN_EVENT_SCAN); if (event_scan == RTAS_UNKNOWN_SERVICE) { printk(KERN_INFO "rtasd: No event-scan on system\n"); return -ENODEV; diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index e0647720ed5e..61dfec74ff85 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -41,7 +41,7 @@ static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int ret = -1; int rval; - rval = rtas_call(rtas_token("read-pci-config"), 2, 2, &ret, addr, len); + rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len); *val = ret; return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; } @@ -55,7 +55,7 @@ static int rtas_write_config(struct pci_bus *bus, unsigned int devfn, | (hose->global_number << 24); int rval; - rval = rtas_call(rtas_token("write-pci-config"), 3, 1, NULL, + rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL, addr, len, val); return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; } diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 8d934ea6270c..98db63b72d56 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -297,8 +297,8 @@ int cbe_sysreset_hack(void) static int __init cbe_ptcal_init(void) { int ret; - ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal"); - ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal"); + ptcal_start_tok = rtas_function_token(RTAS_FN_IBM_CBE_START_PTCAL); + ptcal_stop_tok = rtas_function_token(RTAS_FN_IBM_CBE_STOP_PTCAL); if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE) diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index 31ce00b52a32..30394c6f8894 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -81,7 +81,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. */ - start_cpu = rtas_token("start-cpu"); + start_cpu = rtas_function_token(RTAS_FN_START_CPU); if (start_cpu == RTAS_UNKNOWN_SERVICE) return 1; @@ -152,7 +152,7 @@ void __init smp_init_cell(void) cpumask_clear_cpu(boot_cpuid, &of_spin_map); /* Non-lpar has additional take/give timebase */ - if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { + if (rtas_function_token(RTAS_FN_FREEZE_TIME_BASE) != RTAS_UNKNOWN_SERVICE) { smp_ops->give_timebase = rtas_give_timebase; smp_ops->take_timebase = rtas_take_timebase; } diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index dab78076fedb..0eedae96498c 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -31,7 +31,7 @@ static unsigned char chrp_nvram_read_val(int addr) return 0xff; } spin_lock_irqsave(&nvram_lock, flags); - if ((rtas_call(rtas_token("nvram-fetch"), 3, 2, &done, addr, + if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_FETCH), 3, 2, &done, addr, __pa(nvram_buf), 1) != 0) || 1 != done) ret = 0xff; else @@ -53,7 +53,7 @@ static void chrp_nvram_write_val(int addr, unsigned char val) } spin_lock_irqsave(&nvram_lock, flags); nvram_buf[0] = val; - if ((rtas_call(rtas_token("nvram-store"), 3, 2, &done, addr, + if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_STORE), 3, 2, &done, addr, __pa(nvram_buf), 1) != 0) || 1 != done) printk(KERN_DEBUG "rtas IO error storing 0x%02x at %d", val, addr); spin_unlock_irqrestore(&nvram_lock, flags); diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 6f6598e771ff..428fd2a7b3ee 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -104,7 +104,7 @@ static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int ret = -1; int rval; - rval = rtas_call(rtas_token("read-pci-config"), 2, 2, &ret, addr, len); + rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len); *val = ret; return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL; } @@ -118,7 +118,7 @@ static int rtas_write_config(struct pci_bus *bus, unsigned int devfn, int offset | (hose->global_number << 24); int rval; - rval = rtas_call(rtas_token("write-pci-config"), 3, 1, NULL, + rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL, addr, len, val); return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL; } diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index ec63c0558db6..d9049ceb1046 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -323,11 +323,11 @@ static void __init chrp_setup_arch(void) printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]); rtas_initialize(); - if (rtas_token("display-character") >= 0) + if (rtas_function_token(RTAS_FN_DISPLAY_CHARACTER) >= 0) ppc_md.progress = rtas_progress; /* use RTAS time-of-day routines if available */ - if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) { + if (rtas_function_token(RTAS_FN_GET_TIME_OF_DAY) != RTAS_UNKNOWN_SERVICE) { ppc_md.get_boot_time = rtas_get_boot_time; ppc_md.get_rtc_time = rtas_get_rtc_time; ppc_md.set_rtc_time = rtas_set_rtc_time; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index c26c379e1cc8..98c8e3603064 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -162,8 +162,8 @@ static struct smp_ops_t maple_smp_ops = { static void __init maple_use_rtas_reboot_and_halt_if_present(void) { - if (rtas_service_present("system-reboot") && - rtas_service_present("power-off")) { + if (rtas_function_implemented(RTAS_FN_SYSTEM_REBOOT) && + rtas_function_implemented(RTAS_FN_POWER_OFF)) { ppc_md.restart = rtas_restart; pm_power_off = rtas_power_off; ppc_md.halt = rtas_halt; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 9b65b50a5456..75ffdbcd2865 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -143,7 +143,7 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, int cc_token; int rc = -1; - cc_token = rtas_token("ibm,configure-connector"); + cc_token = rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR); if (cc_token == RTAS_UNKNOWN_SERVICE) return NULL; diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 6b507b62ce8f..def184da51cf 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -699,7 +699,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u static int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs) { int rc; - int ibm_allow_unfreeze = rtas_token("ibm,open-sriov-allow-unfreeze"); + int ibm_allow_unfreeze = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE); unsigned long buid, addr; addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); @@ -774,7 +774,7 @@ static int pseries_notify_resume(struct eeh_dev *edev) if (!edev) return -EEXIST; - if (rtas_token("ibm,open-sriov-allow-unfreeze") == RTAS_UNKNOWN_SERVICE) + if (rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE) == RTAS_UNKNOWN_SERVICE) return -EINVAL; if (edev->pdev->is_physfn || edev->pdev->is_virtfn) @@ -815,14 +815,14 @@ static int __init eeh_pseries_init(void) int ret, config_addr; /* figure out EEH RTAS function call tokens */ - ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); - ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); - ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); - ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); - ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); - ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); - ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); - ibm_configure_pe = rtas_token("ibm,configure-pe"); + ibm_set_eeh_option = rtas_function_token(RTAS_FN_IBM_SET_EEH_OPTION); + ibm_set_slot_reset = rtas_function_token(RTAS_FN_IBM_SET_SLOT_RESET); + ibm_read_slot_reset_state2 = rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE2); + ibm_read_slot_reset_state = rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE); + ibm_slot_error_detail = rtas_function_token(RTAS_FN_IBM_SLOT_ERROR_DETAIL); + ibm_get_config_addr_info2 = rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO2); + ibm_get_config_addr_info = rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO); + ibm_configure_pe = rtas_function_token(RTAS_FN_IBM_CONFIGURE_PE); /* * ibm,configure-pe and ibm,configure-bridge have the same semantics, @@ -830,7 +830,7 @@ static int __init eeh_pseries_init(void) * ibm,configure-pe then fall back to using ibm,configure-bridge. */ if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE) - ibm_configure_pe = rtas_token("ibm,configure-bridge"); + ibm_configure_pe = rtas_function_token(RTAS_FN_IBM_CONFIGURE_BRIDGE); /* * Necessary sanity check. We needn't check "get-config-addr-info" diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 090ae5a1e0f5..982e5e4b5e06 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -855,8 +855,8 @@ static int __init pseries_cpu_hotplug_init(void) ppc_md.cpu_release = dlpar_cpu_release; #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ - rtas_stop_self_token = rtas_token("stop-self"); - qcss_tok = rtas_token("query-cpu-stopped-state"); + rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF); + qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE); if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE || qcss_tok == RTAS_UNKNOWN_SERVICE) { diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c index 7b74d4d34e9a..f411d4fe7b24 100644 --- a/arch/powerpc/platforms/pseries/io_event_irq.c +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -143,7 +143,7 @@ static int __init ioei_init(void) { struct device_node *np; - ioei_check_exception_token = rtas_token("check-exception"); + ioei_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) return -ENODEV; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 4cea71aa0f41..643d309d1bd0 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -195,7 +195,7 @@ static int update_dt_node(struct device_node *dn, s32 scope) u32 nprops; u32 vd; - update_properties_token = rtas_token("ibm,update-properties"); + update_properties_token = rtas_function_token(RTAS_FN_IBM_UPDATE_PROPERTIES); if (update_properties_token == RTAS_UNKNOWN_SERVICE) return -EINVAL; @@ -306,7 +306,7 @@ static int pseries_devicetree_update(s32 scope) int update_nodes_token; int rc; - update_nodes_token = rtas_token("ibm,update-nodes"); + update_nodes_token = rtas_function_token(RTAS_FN_IBM_UPDATE_NODES); if (update_nodes_token == RTAS_UNKNOWN_SERVICE) return 0; diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 3f05507e444d..423ee1d5bd94 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -679,8 +679,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) static int rtas_msi_init(void) { - query_token = rtas_token("ibm,query-interrupt-source-number"); - change_token = rtas_token("ibm,change-msi"); + query_token = rtas_function_token(RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER); + change_token = rtas_function_token(RTAS_FN_IBM_CHANGE_MSI); if ((query_token == RTAS_UNKNOWN_SERVICE) || (change_token == RTAS_UNKNOWN_SERVICE)) { diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index cbf1720eb4aa..8130c37962c0 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -227,8 +227,8 @@ int __init pSeries_nvram_init(void) nvram_size = be32_to_cpup(nbytes_p); - nvram_fetch = rtas_token("nvram-fetch"); - nvram_store = rtas_token("nvram-store"); + nvram_fetch = rtas_function_token(RTAS_FN_NVRAM_FETCH); + nvram_store = rtas_function_token(RTAS_FN_NVRAM_STORE); printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); of_node_put(nvram); diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c index 2bb5c816399b..fedc61599e6c 100644 --- a/arch/powerpc/platforms/pseries/papr-sysparm.c +++ b/arch/powerpc/platforms/pseries/papr-sysparm.c @@ -50,7 +50,7 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) { - const s32 token = rtas_token("ibm,get-system-parameter"); + const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER); struct rtas_work_area *work_area; s32 fwrc; int ret; @@ -102,7 +102,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) { - const s32 token = rtas_token("ibm,set-system-parameter"); + const s32 token = rtas_function_token(RTAS_FN_IBM_SET_SYSTEM_PARAMETER); struct rtas_work_area *work_area; s32 fwrc; int ret; diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 6e671c3809ec..60e0a58928ef 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -60,7 +60,7 @@ static int pseries_send_map_pe(struct pci_dev *pdev, u16 num_vfs, struct pci_dn *pdn; int rc; unsigned long buid, addr; - int ibm_map_pes = rtas_token("ibm,open-sriov-map-pe-number"); + int ibm_map_pes = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_MAP_PE_NUMBER); if (ibm_map_pes == RTAS_UNKNOWN_SERVICE) return -EINVAL; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f12516c3998c..adafd593d9d3 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -155,7 +155,7 @@ static int __init init_ras_IRQ(void) { struct device_node *np; - ras_check_exception_token = rtas_token("check-exception"); + ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); /* Internal Errors */ np = of_find_node_by_path("/event-sources/internal-errors"); diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c index 1ee63335bd4b..b37d52f40360 100644 --- a/arch/powerpc/platforms/pseries/rtas-work-area.c +++ b/arch/powerpc/platforms/pseries/rtas-work-area.c @@ -203,7 +203,7 @@ void __init rtas_work_area_reserve_arena(const phys_addr_t limit) * So set up the arena if we find that, with a fallback to * ibm,configure-connector, just in case. */ - if (rtas_service_present("ibm,get-system-parameter") || - rtas_service_present("ibm,configure-connector")) + if (rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER) || + rtas_function_implemented(RTAS_FN_IBM_CONFIGURE_CONNECTOR)) rwa_state.arena = memblock_alloc_try_nid(size, align, min, limit, nid); } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 420a2fa48292..4a0cec8cf623 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -136,11 +136,11 @@ static void __init fwnmi_init(void) #endif int ibm_nmi_register_token; - ibm_nmi_register_token = rtas_token("ibm,nmi-register"); + ibm_nmi_register_token = rtas_function_token(RTAS_FN_IBM_NMI_REGISTER); if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE) return; - ibm_nmi_interlock_token = rtas_token("ibm,nmi-interlock"); + ibm_nmi_interlock_token = rtas_function_token(RTAS_FN_IBM_NMI_INTERLOCK); if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE)) return; @@ -1071,14 +1071,14 @@ static void __init pseries_init(void) static void pseries_power_off(void) { int rc; - int rtas_poweroff_ups_token = rtas_token("ibm,power-off-ups"); + int rtas_poweroff_ups_token = rtas_function_token(RTAS_FN_IBM_POWER_OFF_UPS); if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_POWER_OFF); if (rtas_poweron_auto == 0 || rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { - rc = rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1); + rc = rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1); printk(KERN_INFO "RTAS power-off returned %d\n", rc); } else { rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 2bcfee86ff87..c597711ef20a 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -55,7 +55,7 @@ static cpumask_var_t of_spin_mask; int smp_query_cpu_stopped(unsigned int pcpu) { int cpu_status, status; - int qcss_tok = rtas_token("query-cpu-stopped-state"); + int qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE); if (qcss_tok == RTAS_UNKNOWN_SERVICE) { printk_once(KERN_INFO @@ -108,7 +108,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. */ - start_cpu = rtas_token("start-cpu"); + start_cpu = rtas_function_token(RTAS_FN_START_CPU); if (start_cpu == RTAS_UNKNOWN_SERVICE) return 1; @@ -266,7 +266,7 @@ void __init smp_init_pseries(void) * We know prom_init will not have started them if RTAS supports * query-cpu-stopped-state. */ - if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { + if (rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE) == RTAS_UNKNOWN_SERVICE) { if (cpu_has_feature(CPU_FTR_SMT)) { for_each_present_cpu(i) { if (cpu_thread_in_core(i) == 0) diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index f8320f8e5bc7..b772a833d9b7 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -200,10 +200,10 @@ static struct ics ics_rtas = { __init int ics_rtas_init(void) { - ibm_get_xive = rtas_token("ibm,get-xive"); - ibm_set_xive = rtas_token("ibm,set-xive"); - ibm_int_on = rtas_token("ibm,int-on"); - ibm_int_off = rtas_token("ibm,int-off"); + ibm_get_xive = rtas_function_token(RTAS_FN_IBM_GET_XIVE); + ibm_set_xive = rtas_function_token(RTAS_FN_IBM_SET_XIVE); + ibm_int_on = rtas_function_token(RTAS_FN_IBM_INT_ON); + ibm_int_off = rtas_function_token(RTAS_FN_IBM_INT_OFF); /* We enable the RTAS "ICS" if RTAS is present with the * appropriate tokens diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0da66bc4823d..73c620c2a3a1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -76,9 +76,6 @@ static cpumask_t xmon_batch_cpus = CPU_MASK_NONE; #define xmon_owner 0 #endif /* CONFIG_SMP */ -#ifdef CONFIG_PPC_PSERIES -static int set_indicator_token = RTAS_UNKNOWN_SERVICE; -#endif static unsigned long in_xmon __read_mostly = 0; static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT); static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE); @@ -398,6 +395,7 @@ static inline void disable_surveillance(void) #ifdef CONFIG_PPC_PSERIES /* Since this can't be a module, args should end up below 4GB. */ static struct rtas_args args; + const s32 token = rtas_function_token(RTAS_FN_SET_INDICATOR); /* * At this point we have got all the cpus we can into @@ -406,10 +404,10 @@ static inline void disable_surveillance(void) * If we did try to take rtas.lock there would be a * real possibility of deadlock. */ - if (set_indicator_token == RTAS_UNKNOWN_SERVICE) + if (token == RTAS_UNKNOWN_SERVICE) return; - rtas_call_unlocked(&args, set_indicator_token, 3, 1, NULL, + rtas_call_unlocked(&args, token, 3, 1, NULL, SURVEILLANCE_TOKEN, 0, 0); #endif /* CONFIG_PPC_PSERIES */ @@ -3976,14 +3974,6 @@ static void xmon_init(int enable) __debugger_iabr_match = xmon_iabr_match; __debugger_break_match = xmon_break_match; __debugger_fault_handler = xmon_fault_handler; - -#ifdef CONFIG_PPC_PSERIES - /* - * Get the token here to avoid trying to get a lock - * during the crash, causing a deadlock. - */ - set_indicator_token = rtas_token("set-indicator"); -#endif } else { __debugger = NULL; __debugger_ipi = NULL; -- cgit v1.2.3 From 4f11410bf6da87defe8fd59b0413f0d9f71744da Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 27 Jan 2023 08:57:42 -0500 Subject: selftests/powerpc: Fix incorrect kernel headers search path Use $(KHDR_INCLUDES) as lookup path for kernel headers. This prevents building against kernel headers from the build environment in scenarios where kernel headers are installed into a specific output directory (O=...). Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: Mathieu Desnoyers Acked-by: Shuah Khan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230127135755.79929-22-mathieu.desnoyers@efficios.com --- tools/testing/selftests/powerpc/ptrace/Makefile | 2 +- tools/testing/selftests/powerpc/security/Makefile | 2 +- tools/testing/selftests/powerpc/syscalls/Makefile | 2 +- tools/testing/selftests/powerpc/tm/Makefile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 2f02cb54224d..cbeeaeae8837 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -33,7 +33,7 @@ TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64)) $(TESTS_64): CFLAGS += -m64 $(TM_TESTS): CFLAGS += -I../tm -mhtm -CFLAGS += -I../../../../../usr/include -fno-pie +CFLAGS += $(KHDR_INCLUDES) -fno-pie $(OUTPUT)/ptrace-gpr: ptrace-gpr.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 7488315fd847..e0d979ab0204 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -5,7 +5,7 @@ TEST_PROGS := mitigation-patching.sh top_srcdir = ../../../../.. -CFLAGS += -I../../../../../usr/include +CFLAGS += $(KHDR_INCLUDES) include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index 54ff5cfffc63..ee1740ddfb0c 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only TEST_GEN_PROGS := ipc_unmuxed rtas_filter -CFLAGS += -I../../../../../usr/include +CFLAGS += $(KHDR_INCLUDES) top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 5881e97c73c1..3876805c2f31 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -17,7 +17,7 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c CFLAGS += -mhtm $(OUTPUT)/tm-syscall: tm-syscall-asm.S -$(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include +$(OUTPUT)/tm-syscall: CFLAGS += $(KHDR_INCLUDES) $(OUTPUT)/tm-tmspr: CFLAGS += -pthread $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 $(OUTPUT)/tm-resched-dscr: ../pmu/lib.c -- cgit v1.2.3 From 9efcdaac36e1643a1b7f5337e6143ce142d381b1 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 9 Feb 2023 16:26:49 +0530 Subject: powerpc/eeh: Set channel state after notifying the drivers When a PCI error is encountered 6th time in an hour we set the channel state to perm_failure and notify the driver about the permanent failure. However, after upstream commit 38ddc011478e ("powerpc/eeh: Make permanently failed devices non-actionable"), EEH handler stops calling any routine once the device is marked as permanent failure. This issue can lead to fatal consequences like kernel hang with certain PCI devices. Following log is observed with lpfc driver, with and without this change, Without this change kernel hangs, If PCI error is encountered 6 times for a device in an hour. Without the change EEH: Beginning: 'error_detected(permanent failure)' PCI 0132:60:00.0#600000: EEH: not actionable (1,1,1) PCI 0132:60:00.1#600000: EEH: not actionable (1,1,1) EEH: Finished:'error_detected(permanent failure)' With the change EEH: Beginning: 'error_detected(permanent failure)' EEH: Invoking lpfc->error_detected(permanent failure) EEH: lpfc driver reports: 'disconnect' EEH: Invoking lpfc->error_detected(permanent failure) EEH: lpfc driver reports: 'disconnect' EEH: Finished:'error_detected(permanent failure)' To fix the issue, set channel state to permanent failure after notifying the drivers. Fixes: 38ddc011478e ("powerpc/eeh: Make permanently failed devices non-actionable") Suggested-by: Mahesh Salgaonkar Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230209105649.127707-1-ganeshgr@linux.ibm.com --- arch/powerpc/kernel/eeh_driver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index f279295179bd..438568a472d0 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -1065,10 +1065,10 @@ recover_failed: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_set_irq_state(pe, false); eeh_pe_report("error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); /* Mark the PE to be removed permanently */ eeh_pe_state_mark(pe, EEH_PE_REMOVED); @@ -1185,10 +1185,10 @@ void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_pe_report( "error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { -- cgit v1.2.3 From 77e82fa1f9781a958a6ea4aed7aec41239a5a22f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Dec 2022 19:45:58 +0100 Subject: powerpc/64: Replace -mcpu=e500mc64 by -mcpu=e5500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E500MC64 is a processor pre-dating E5500 that has never been commercialised. Use -mcpu=e5500 for E5500 core. More details at https://gcc.gnu.org/PR108149 Signed-off-by: Christophe Leroy Acked-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/fa71ed20d22c156225436374f0ab847daac893bc.1671475543.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/Kconfig.cputype | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3219e974ec95..046b571496b1 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -257,7 +257,7 @@ config TARGET_CPU default "power8" if POWER8_CPU default "power9" if POWER9_CPU default "power10" if POWER10_CPU - default "e500mc64" if E5500_CPU + default "e5500" if E5500_CPU default "e6500" if E6500_CPU default "power4" if POWERPC64_CPU && !CPU_LITTLE_ENDIAN default "power8" if POWERPC64_CPU && CPU_LITTLE_ENDIAN -- cgit v1.2.3 From 388defd5e4180a48e068d7ba9b024ce0ca957968 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 13 Feb 2023 13:23:51 -0600 Subject: powerpc/machdep: warn when machine_is() used too early machine_is() can't provide correct results before probe_machine() has run. Warn when it's used too early in boot, placing the WARN_ON() in a helper function so the reported file:line indicates exactly what went wrong. checkpatch complains about __attribute__((weak)) in the patch, so change that to __weak, and align the line continuations as well. Signed-off-by: Nathan Lynch Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230210-warn-on-machine-is-before-probe-machine-v2-1-b57f8243c51c@linux.ibm.com --- arch/powerpc/include/asm/machdep.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 378b8d5836a7..459736d5e511 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_MACHDEP_H #ifdef __KERNEL__ +#include #include #include #include @@ -220,11 +221,16 @@ extern struct machdep_calls *machine_id; EXPORT_SYMBOL(mach_##name); \ struct machdep_calls mach_##name __machine_desc = -#define machine_is(name) \ - ({ \ - extern struct machdep_calls mach_##name \ - __attribute__((weak)); \ - machine_id == &mach_##name; \ +static inline bool __machine_is(const struct machdep_calls *md) +{ + WARN_ON(!machine_id); // complain if used before probe_machine() + return machine_id == md; +} + +#define machine_is(name) \ + ({ \ + extern struct machdep_calls mach_##name __weak; \ + __machine_is(&mach_##name); \ }) static inline void log_error(char *buf, unsigned int err_type, int fatal) -- cgit v1.2.3 From b0ae5b6f3c298a005b73556740526c0e24a5633c Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Fri, 16 Dec 2022 17:57:08 +0530 Subject: powerpc/kexec_file: print error string on usable memory property update failure Print the FDT error description along with the error message if failed to set the "linux,drconf-usable-memory" property in the kdump kernel's FDT. Signed-off-by: Sourabh Jain Acked-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221216122708.182154-1-sourabhjain@linux.ibm.com --- arch/powerpc/kexec/file_load_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index ab80c492da31..110d28bede2a 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -690,7 +690,8 @@ static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem) ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory", um_info.buf, (um_info.idx * sizeof(u64))); if (ret) { - pr_err("Failed to update fdt with linux,drconf-usable-memory property"); + pr_err("Failed to update fdt with linux,drconf-usable-memory property: %s", + fdt_strerror(ret)); goto out; } } -- cgit v1.2.3 From 748ea32d2dbd813d3bd958117bde5191182f909a Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 15 Feb 2023 10:12:12 -0700 Subject: macintosh: windfarm: Use unsigned type for 1-bit bitfields Clang warns: drivers/macintosh/windfarm_lm75_sensor.c:63:14: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion] lm->inited = 1; ^ ~ drivers/macintosh/windfarm_smu_sensors.c:356:19: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion] pow->fake_volts = 1; ^ ~ drivers/macintosh/windfarm_smu_sensors.c:368:18: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion] pow->quadratic = 1; ^ ~ There is no bug here since no code checks the actual value of these fields, just whether or not they are zero (boolean context), but this can be easily fixed by switching to an unsigned type. Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230215-windfarm-wsingle-bit-bitfield-constant-conversion-v1-1-26415072e855@kernel.org --- drivers/macintosh/windfarm_lm75_sensor.c | 4 ++-- drivers/macintosh/windfarm_smu_sensors.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index 24f0a444d312..9c6febce2376 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -33,8 +33,8 @@ #endif struct wf_lm75_sensor { - int ds1775 : 1; - int inited : 1; + unsigned int ds1775 : 1; + unsigned int inited : 1; struct i2c_client *i2c; struct wf_sensor sens; }; diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c index 00c6fe25fcba..2bdb73b34d29 100644 --- a/drivers/macintosh/windfarm_smu_sensors.c +++ b/drivers/macintosh/windfarm_smu_sensors.c @@ -274,8 +274,8 @@ struct smu_cpu_power_sensor { struct list_head link; struct wf_sensor *volts; struct wf_sensor *amps; - int fake_volts : 1; - int quadratic : 1; + unsigned int fake_volts : 1; + unsigned int quadratic : 1; struct wf_sensor sens; }; #define to_smu_cpu_power(c) container_of(c, struct smu_cpu_power_sensor, sens) -- cgit v1.2.3 From 7096deb7b5387e7899655213b7430ab043ddda4f Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 16 Feb 2023 18:09:03 +1100 Subject: powerpc/pseries: Fix endianness issue when parsing PLPKS secvar flags When a user updates a variable through the PLPKS secvar interface, we take the first 8 bytes of the data written to the update attribute to pass through to the H_PKS_SIGNED_UPDATE hcall as flags. These bytes are always written in big-endian format. Currently, the flags bytes are memcpy()ed into a u64, which is then loaded into a register to pass as part of the hcall. This means that on LE systems, the bytes are in the wrong order. Use be64_to_cpup() instead, to ensure the flags bytes are byteswapped if necessary. Reported-by: Stefan Berger Fixes: ccadf154cb00 ("powerpc/pseries: Implement secvars for dynamic secure boot") Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230216070903.355091-1-ajd@linux.ibm.com --- arch/powerpc/platforms/pseries/plpks-secvar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/plpks-secvar.c b/arch/powerpc/platforms/pseries/plpks-secvar.c index f6c8888f4076..257fd1f8bc19 100644 --- a/arch/powerpc/platforms/pseries/plpks-secvar.c +++ b/arch/powerpc/platforms/pseries/plpks-secvar.c @@ -135,7 +135,8 @@ static int plpks_set_variable(const char *key, u64 key_len, u8 *data, goto err; var.namelen = rc * 2; - memcpy(&flags, data, sizeof(flags)); + // Flags are contained in the first 8 bytes of the buffer, and are always big-endian + flags = be64_to_cpup((__be64 *)data); var.datalen = data_size - sizeof(flags); var.data = data + sizeof(flags); -- cgit v1.2.3 From 91360b446a5cced537d61fc2394253e8c6b9ae7b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 13 Feb 2023 22:23:22 +1100 Subject: powerpc/nohash: Fix build error with binutils >= 2.38 With bintils >= 2.38 the ppc64_book3e_allmodconfig build fails: {standard input}: Assembler messages: {standard input}:196: Error: unrecognized opcode: `lbarx' {standard input}:196: Error: unrecognized opcode: `stbcx.' make[5]: *** [scripts/Makefile.build:252: arch/powerpc/mm/nohash/e500_hugetlbpage.o] Error 1 That happens because the default CPU for that config is e5500, set via CONFIG_TARGET_CPU, and so the assembler is building for e5500, which doesn't support those instructions. Fix it by using machine directives to tell the assembler to assemble the relevant code for e6500, which does support lbarx/stbcx. That is safe because the code already has the CPU_FTR_SMT check, which ensures the lbarx sequence doesn't run on e5500, which doesn't support SMT. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230213112322.998003-1-mpe@ellerman.id.au --- arch/powerpc/mm/nohash/e500_hugetlbpage.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/e500_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c index c7d4b317a823..58c8d9849cb1 100644 --- a/arch/powerpc/mm/nohash/e500_hugetlbpage.c +++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c @@ -45,7 +45,9 @@ static inline void book3e_tlb_lock(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - asm volatile("1: lbarx %0, 0, %1;" + asm volatile(".machine push;" + ".machine e6500;" + "1: lbarx %0, 0, %1;" "cmpwi %0, 0;" "bne 2f;" "stbcx. %2, 0, %1;" @@ -56,6 +58,7 @@ static inline void book3e_tlb_lock(void) "bne 2b;" "b 1b;" "3:" + ".machine pop;" : "=&r" (tmp) : "r" (&paca->tcd_ptr->lock), "r" (token) : "memory"); -- cgit v1.2.3 From a7caf3f181f160ae13ece0124e1c268d22263708 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 16 Feb 2023 22:29:15 +1100 Subject: powerpc/nohash: Fix build with llvm-as When using the LLVM integrated assembler (llvm-as), the book3e build fails with: arch/powerpc/mm/nohash/tlb_low_64e.S:354:2: error: invalid instruction tlbilxva 0,%r15 ^ tlbilxva is an extended mnemonic for tlbilx, but llvm-as also doesn't support tlbilx, despite it being an e500mc instruction. Fix it by using the existing PPC_TLBILX_VA macro. The resulting binary is identical when building with binutils. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230216112915.1681631-1-mpe@ellerman.id.au --- arch/powerpc/mm/nohash/tlb_low_64e.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 76cf456d7976..7e0b8fe1c279 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -351,7 +351,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) mfspr r15,SPRN_MAS2 isync - tlbilxva 0,r15 + PPC_TLBILX_VA(0,R15) isync mtspr SPRN_MAS6,r10 -- cgit v1.2.3 From d78c8e32890ef7eca79ffd67c96022c7f9d8cce4 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 10 Aug 2022 13:43:18 +0200 Subject: powerpc/mm: Rearrange if-else block to avoid clang warning Clang warns: arch/powerpc/mm/book3s64/radix_tlb.c:1191:23: error: variable 'hstart' is uninitialized when used here __tlbiel_va_range(hstart, hend, pid, ^~~~~~ arch/powerpc/mm/book3s64/radix_tlb.c:1191:31: error: variable 'hend' is uninitialized when used here __tlbiel_va_range(hstart, hend, pid, ^~~~ Rework the 'if (IS_ENABLE(CONFIG_TRANSPARENT_HUGEPAGE))' so hstart/hend is always initialized to silence the warnings. That will also simplify the 'else' path. Clang is getting confused with these warnings, but the warnings is a false-positive. Suggested-by: Arnd Bergmann Suggested-by: Nathan Chancellor Reviewed-by: Christophe Leroy Reviewed-by: Nathan Chancellor Signed-off-by: Anders Roxell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220810114318.3220630-1-anders.roxell@linaro.org --- arch/powerpc/mm/book3s64/radix_tlb.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 50b70105364f..e50bc5fc7ddf 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -1184,15 +1184,12 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, } } } else { - bool hflush = false; + bool hflush; unsigned long hstart, hend; - if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - hstart = (start + PMD_SIZE - 1) & PMD_MASK; - hend = end & PMD_MASK; - if (hstart < hend) - hflush = true; - } + hstart = (start + PMD_SIZE - 1) & PMD_MASK; + hend = end & PMD_MASK; + hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend; if (type == FLUSH_TYPE_LOCAL) { asm volatile("ptesync": : :"memory"); -- cgit v1.2.3 From bfb03af71a3798b5a88a945a9c19ad67e1c4986d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Dec 2022 19:45:57 +0100 Subject: powerpc: Pass correct CPU reference to assembler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jan-Benedict reported issue with building ppc64e_defconfig with mainline GCC work: powerpc64-linux-gcc -Wp,-MMD,arch/powerpc/kernel/vdso/.gettimeofday-64.o.d -nostdinc -I./arch/powerpc/include -I./arch/powerpc/include/generated -I./include -I./arch/powerpc/include/uapi -I./arch/powerpc/include/generated/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/compiler-version.h -include ./include/linux/kconfig.h -D__KERNEL__ -I ./arch/powerpc -DHAVE_AS_ATHIGH=1 -fmacro-prefix-map=./= -D__ASSEMBLY__ -fno-PIE -m64 -Wl,-a64 -mabi=elfv1 -Wa,-me500 -Wa,-me500mc -mabi=elfv1 -mbig-endian -Wl,-soname=linux-vdso64.so.1 -D__VDSO64__ -s -c -o arch/powerpc/kernel/vdso/gettimeofday-64.o arch/powerpc/kernel/vdso/gettimeofday.S arch/powerpc/kernel/vdso/gettimeofday.S: Assembler messages: arch/powerpc/kernel/vdso/gettimeofday.S:72: Error: unrecognized opcode: `stdu' arch/powerpc/kernel/vdso/gettimeofday.S:72: Error: unrecognized opcode: `stdu' arch/powerpc/kernel/vdso/gettimeofday.S:72: Error: unrecognized opcode: `std' arch/powerpc/kernel/vdso/gettimeofday.S:72: Error: unrecognized opcode: `std' arch/powerpc/kernel/vdso/gettimeofday.S:72: Error: unrecognized opcode: `ld' arch/powerpc/kernel/vdso/gettimeofday.S:72: Error: unrecognized opcode: `ld' ... make[1]: *** [arch/powerpc/kernel/vdso/Makefile:76: arch/powerpc/kernel/vdso/gettimeofday-64.o] Error 1 make: *** [arch/powerpc/Makefile:387: vdso_prepare] Error 2 This is due to assembler being called with -me500mc which is a 32 bits target. The problem comes from the fact that CONFIG_PPC_E500MC is selected for both the e500mc (32 bits) and the e5500 (64 bits), and therefore the following makefile rule is wrong: cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) Today we have CONFIG_TARGET_CPU which provides the identification of the expected CPU, it is used for GCC. Once GCC knows the target CPU, it adds the correct CPU option to assembler, no need to add it explicitly. With that change (And also commit 45f7091aac35 ("powerpc/64: Set default CPU in Kconfig")), it now is: powerpc64-linux-gcc -Wp,-MMD,arch/powerpc/kernel/vdso/.gettimeofday-64.o.d -nostdinc -I./arch/powerpc/include -I./arch/powerpc/include/generated -I./include -I./arch/powerpc/include/uapi -I./arch/powerpc/include/generated/uapi -I./include/uapi -I./include/generated/uapi -include ./include/linux/compiler-version.h -include ./include/linux/kconfig.h -D__KERNEL__ -I ./arch/powerpc -DHAVE_AS_ATHIGH=1 -fmacro-prefix-map=./= -D__ASSEMBLY__ -fno-PIE -m64 -Wl,-a64 -mabi=elfv1 -mcpu=e500mc64 -mabi=elfv1 -mbig-endian -Wl,-soname=linux-vdso64.so.1 -D__VDSO64__ -s -c -o arch/powerpc/kernel/vdso/gettimeofday-64.o arch/powerpc/kernel/vdso/gettimeofday.S Reported-by: Jan-Benedict Glaw Signed-off-by: Christophe Leroy Acked-by: Pali Rohár [mpe: Retain -Wa,-mpower4 -Wa,-many for Book3S 64 builds for now] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/758ad54128fa9dd2fdedc4c511592111cbded900.1671475543.git.christophe.leroy@csgroup.eu --- arch/powerpc/Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index bf5f0a998273..5622e44f534c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -201,10 +201,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -cpu-as-$(CONFIG_40x) += -Wa,-m405 -cpu-as-$(CONFIG_44x) += -Wa,-m440 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) -cpu-as-$(CONFIG_PPC_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 # mnemonic and failing that it will allow any valid mnemonic that GAS knows @@ -212,7 +209,6 @@ cpu-as-$(CONFIG_PPC_E500) += -Wa,-me500 # LLVM IAS doesn't understand either flag: https://github.com/ClangBuiltLinux/linux/issues/675 # but LLVM IAS only supports ISA >= 2.06 for Book3S 64 anyway... cpu-as-$(CONFIG_PPC_BOOK3S_64) += $(call as-option,-Wa$(comma)-mpower4) $(call as-option,-Wa$(comma)-many) -cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) KBUILD_AFLAGS += $(cpu-as-y) KBUILD_CFLAGS += $(cpu-as-y) -- cgit v1.2.3 From 3c2ce4912a6f44dfb11bc5d241b13e9f5d79078b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 19 Dec 2022 19:46:00 +0100 Subject: powerpc/epapr: Don't use wrteei on non booke wrteei is only for booke. Use the standard mfmsr/ori/mtmsr when non booke. Reported-by: Jan-Benedict Glaw Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b29c7f1727433b003eae050e44072741c8ac223b.1671475543.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/epapr_hcalls.S | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 69a912550577..033116e465d0 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -21,7 +21,13 @@ _GLOBAL(epapr_ev_idle) ori r4, r4,_TLF_NAPPING /* so when we take an exception */ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */ +#ifdef CONFIG_BOOKE_OR_40x wrteei 1 +#else + mfmsr r4 + ori r4, r4, MSR_EE + mtmsr r4 +#endif idle_loop: LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) -- cgit v1.2.3 From 38d73b671a817328334f2a70a23fed4d1f4a952c Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Fri, 17 Feb 2023 10:02:26 +0530 Subject: powerpc/64: Fix unannotated intra-function call warning objtool throws the following warning: arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x6128: unannotated intra-function call Fix the warning by annotating start_initialization_book3s symbol with the SYM_FUNC_START_LOCAL and SYM_FUNC_END macros. Reported-by: Stephen Rothwell Signed-off-by: Sathvika Vasireddy Fixes: 58f24eea5278 ("powerpc/64s: Refactor initialisation after prom") Suggested-by: Josh Poimboeuf Tested-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230217043226.1020041-1-sv@linux.ibm.com --- arch/powerpc/kernel/head_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3a7266fa8a18..1febb56ebaeb 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -472,7 +472,7 @@ SYM_FUNC_START_LOCAL(__mmu_off) b . /* prevent speculative execution */ SYM_FUNC_END(__mmu_off) -start_initialization_book3s: +SYM_FUNC_START_LOCAL(start_initialization_book3s) mflr r25 /* Setup some critical 970 SPRs before switching MMU off */ @@ -494,6 +494,7 @@ start_initialization_book3s: mtlr r25 blr +SYM_FUNC_END(start_initialization_book3s) #endif /* -- cgit v1.2.3 From 6f8675a6b06dabe62015c6747df5e957cd159338 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 18 Feb 2023 10:28:50 +0100 Subject: powerpc/e500: Add missing prototype for 'relocate_init' Kernel test robot reports: arch/powerpc/mm/nohash/e500.c:314:21: warning: no previous prototype for 'relocate_init' [-Wmissing-prototypes] 314 | notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start) | ^~~~~~~~~~~~~ Add it in mm/mmu_decl.h, close to associated is_second_reloc variable declaration. Reported-by: kernel test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/oe-kbuild-all/202302181136.wgyCKUcs-lkp@intel.com/ Link: https://lore.kernel.org/r/ac9107acf24135e1a07e8f84d2090572d43e3fe4.1676712510.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mmu_decl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index bd9784f77f2e..c6dccb4f06dc 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -120,6 +120,7 @@ extern int switch_to_as1(void); extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu); void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys); void reloc_kernel_entry(void *fdt, int addr); +void relocate_init(u64 dt_ptr, phys_addr_t start); extern int is_second_reloc; #endif extern void loadcam_entry(unsigned int index); -- cgit v1.2.3 From bec46462567032afb05017cb10c545aab0810a73 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Mon, 20 Feb 2023 09:04:35 +0100 Subject: powerpc: dts: turris1x.dts: Set lower priority for CPLD syscon-reboot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to CPLD firmware bugs, set CPLD syscon-reboot priority level to 64 (between rstcr and watchdog) to ensure that rstcr's global-utilities reset method which is preferred stay as default one, and to ensure that CPLD syscon-reboot is more preferred than watchdog reset method. Fixes: 0531a4abd1c6 ("powerpc: dts: turris1x.dts: Add CPLD reboot node") Depends-on: e6333293f27c ("power: reset: syscon-reboot: Add support for specifying priority") Signed-off-by: Pali Rohár Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230220080435.4237-1-pali@kernel.org --- arch/powerpc/boot/dts/turris1x.dts | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts index e9cda34a140e..c9b619f6ed5c 100644 --- a/arch/powerpc/boot/dts/turris1x.dts +++ b/arch/powerpc/boot/dts/turris1x.dts @@ -367,11 +367,34 @@ }; reboot@d { + /* + * CPLD firmware which manages system reset and + * watchdog registers has bugs. It does not + * autoclear system reset register after change + * and watchdog ignores reset line on immediate + * succeeding reset cycle triggered by watchdog. + * These bugs have to be workarounded in U-Boot + * bootloader. So use system reset via syscon as + * a last resort because older U-Boot versions + * do not have workaround for watchdog. + * + * Reset method via rstcr's global-utilities + * (the preferred one) has priority level 128, + * watchdog has priority level 0 and default + * syscon-reboot priority level is 192. + * + * So define syscon-reboot with custom priority + * level 64 (between rstcr and watchdog) because + * rstcr should stay as default preferred reset + * method and reset via watchdog is more broken + * than system reset via syscon. + */ compatible = "syscon-reboot"; reg = <0x0d 0x01>; offset = <0x0d>; mask = <0x01>; value = <0x01>; + priority = <64>; }; led-controller@13 { -- cgit v1.2.3 From f82cdc37c4bd4ba905bf99ade9782a639b5c12e9 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 22 Feb 2023 13:17:08 +1100 Subject: powerpc/pseries: Avoid hcall in plpks_is_available() on non-pseries plpks_is_available() can be called on any platform via kexec but calls _plpks_get_config() which makes a hcall, which will only work on pseries. Fix this by returning early in plpks_is_available() if hcalls aren't possible. Fixes: 119da30d037d ("powerpc/pseries: Expose PLPKS config values, support additional fields") Reported-by: Murphy Zhou Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230222021708.146257-1-ruscur@russell.cc --- arch/powerpc/platforms/pseries/plpks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index cdf09e5bd741..6f7bf3fc3aea 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -22,6 +22,7 @@ #include #include #include +#include static u8 *ospassword; static u16 ospasswordlength; @@ -377,6 +378,9 @@ bool plpks_is_available(void) { int rc; + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return false; + rc = _plpks_get_config(); if (rc) return false; -- cgit v1.2.3