From 04c40eed3f7ac48ddaf20104489510e743a53c47 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2023 13:58:38 +0100 Subject: powerpc/ps3: move udbg_shutdown_ps3gelic prototype Allmodconfig kernels produce a missing-prototypes warning: arch/powerpc/platforms/ps3/gelic_udbg.c:239:6: error: no previous prototype for 'udbg_shutdown_ps3gelic' [-Werror=missing-prototypes] Move the declaration from a local header to asm/ps3.h where it can be seen from both the caller and the definition. Signed-off-by: Arnd Bergmann Signed-off-by: Geoff Levand Acked-by: Jakub Kicinski [mpe: Drop CONFIG_PS3GELIC_UDBG to fix build error] Signed-off-by: Michael Ellerman Link: https://msgid.link/20231108125843.3806765-18-arnd@kernel.org --- arch/powerpc/Kconfig.debug | 1 - arch/powerpc/include/asm/ps3.h | 6 ++++++ arch/powerpc/platforms/ps3/Kconfig | 12 ------------ arch/powerpc/platforms/ps3/Makefile | 2 +- arch/powerpc/platforms/ps3/gelic_udbg.c | 1 + 5 files changed, 8 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index ea4033abc07d..8c80b154e814 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -271,7 +271,6 @@ config PPC_EARLY_DEBUG_USBGECKO config PPC_EARLY_DEBUG_PS3GELIC bool "Early debugging through the PS3 Ethernet port" depends on PPC_PS3 - select PS3GELIC_UDBG help Select this to enable early debugging for the PlayStation3 via UDP broadcasts sent out through the Ethernet port. diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index a5f36546a052..d13d8fdc3411 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -514,4 +514,10 @@ u64 ps3_get_spe_id(void *arg); void ps3_early_mm_init(void); +#ifdef CONFIG_PPC_EARLY_DEBUG_PS3GELIC +void udbg_shutdown_ps3gelic(void); +#else +static inline void udbg_shutdown_ps3gelic(void) {} +#endif + #endif diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index a44869e5ea70..e9c1087dd42e 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -167,16 +167,4 @@ config PS3_LPM profiling support of the Cell processor with programs like perfmon2, then say Y or M, otherwise say N. -config PS3GELIC_UDBG - bool "PS3 udbg output via UDP broadcasts on Ethernet" - depends on PPC_PS3 - help - Enables udbg early debugging output by sending broadcast UDP - via the Ethernet port (UDP port number 18194). - - This driver uses a trivial implementation and is independent - from the main PS3 gelic network driver. - - If in doubt, say N here. - endmenu diff --git a/arch/powerpc/platforms/ps3/Makefile b/arch/powerpc/platforms/ps3/Makefile index 86bf2967a8d4..bc79bb124d1e 100644 --- a/arch/powerpc/platforms/ps3/Makefile +++ b/arch/powerpc/platforms/ps3/Makefile @@ -3,7 +3,7 @@ obj-y += setup.o mm.o time.o hvcall.o htab.o repository.o obj-y += interrupt.o exports.o os-area.o obj-y += system-bus.o -obj-$(CONFIG_PS3GELIC_UDBG) += gelic_udbg.o +obj-$(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) += gelic_udbg.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SPU_BASE) += spu.o obj-y += device-init.o diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c index 6b298010fd84..a5202c18c236 100644 --- a/arch/powerpc/platforms/ps3/gelic_udbg.c +++ b/arch/powerpc/platforms/ps3/gelic_udbg.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 0c9a768de64d24e38e27652b8c273725ccc31916 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2023 13:58:39 +0100 Subject: powerpc/pasemi: mark pas_shutdown() static Allmodconfig builds show a warning about one function that is accidentally marked global: arch/powerpc/platforms/pasemi/setup.c:67:6: error: no previous prototype for 'pas_shutdown' [-Werror=missing-prototypes] Fixes: 656fdf3ad8e0 ("powerpc/pasemi: Add Nemo board device init code.") Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20231108125843.3806765-19-arnd@kernel.org --- arch/powerpc/platforms/pasemi/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index ef985ba2bf21..0761d98e5be3 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -64,7 +64,7 @@ static void __noreturn pas_restart(char *cmd) } #ifdef CONFIG_PPC_PASEMI_NEMO -void pas_shutdown(void) +static void pas_shutdown(void) { /* Set the PLD bit that makes the SB600 think the power button is being pressed */ void __iomem *pld_map = ioremap(0xf5000000,4096); -- cgit v1.2.3 From afb36ac386783d2ef2ed839293c03fd06f470be0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Nov 2023 13:58:40 +0100 Subject: powerpc/powermac: mark smp_psurge_{give,take}_timebase static These functions are only called locally and should be static like the other corresponding functions are: arch/powerpc/platforms/powermac/smp.c:416:13: error: no previous prototype for 'smp_psurge_take_timebase' [-Werror=missing-prototypes] 416 | void __init smp_psurge_take_timebase(void) | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/platforms/powermac/smp.c:432:13: error: no previous prototype for 'smp_psurge_give_timebase' [-Werror=missing-prototypes] 432 | void __init smp_psurge_give_timebase(void) | ^~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20231108125843.3806765-20-arnd@kernel.org --- arch/powerpc/platforms/powermac/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index c83d1e14077e..15644be31990 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -413,7 +413,7 @@ static void __init smp_psurge_setup_cpu(int cpu_nr) printk(KERN_ERR "Couldn't get primary IPI interrupt"); } -void __init smp_psurge_take_timebase(void) +static void __init smp_psurge_take_timebase(void) { if (psurge_type != PSURGE_DUAL) return; @@ -429,7 +429,7 @@ void __init smp_psurge_take_timebase(void) set_dec(tb_ticks_per_jiffy/2); } -void __init smp_psurge_give_timebase(void) +static void __init smp_psurge_give_timebase(void) { /* Nothing to do here */ } -- cgit v1.2.3 From 981d1c997fbc5e193b282f3a325a0230bf697363 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 6 Nov 2023 07:42:55 -0600 Subject: powerpc/rtas: Drop declaration of undefined call_rtas() function The call_rtas() function has never been a part of arch/powerpc, and its implementation was removed from arch/ppc by 0a26b1364f14 ("ppc: Remove CHRP, POWER3 and POWER4 support from arch/ppc"). Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231106-rtas-trivial-v1-3-61847655c51f@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index c697c3c74694..3bf7f0a4b07e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -542,8 +542,6 @@ static inline void pSeries_coalesce_init(void) { } static inline void rtas_initialize(void) { } #endif -extern int call_rtas(const char *, int, int, unsigned long *, ...); - #ifdef CONFIG_HV_PERF_CTRS void read_24x7_sys_info(void); #else -- cgit v1.2.3 From 1d8faf1f41b550eb7ab7ac841ebd70f205840dde Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 6 Nov 2023 07:42:56 -0600 Subject: powerpc/rtas: Remove unused rtas_service_present() rtas_service_present() has no more users. rtas_function_implemented() is now the appropriate API for determining whether a given RTAS function is available to call. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231106-rtas-trivial-v1-4-61847655c51f@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/kernel/rtas.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3bf7f0a4b07e..c6568a647cd0 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -409,7 +409,6 @@ static inline bool rtas_function_implemented(const rtas_fn_handle_t handle) return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE; } extern int rtas_token(const char *service); -extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index eddc031c4b95..b5b340a91157 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -900,11 +900,6 @@ int rtas_token(const char *service) } EXPORT_SYMBOL_GPL(rtas_token); -int rtas_service_present(const char *service) -{ - return rtas_token(service) != RTAS_UNKNOWN_SERVICE; -} - #ifdef CONFIG_RTAS_ERROR_LOGGING static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; -- cgit v1.2.3 From 010862d235c9fab4f0f9dd169efc72df94110758 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 6 Nov 2023 07:42:57 -0600 Subject: powerpc/rtas: Move post_mobility_fixup() declaration to pseries This is a pseries-specific function declaration that doesn't belong in rtas.h. Move it to the pseries platform code and adjust pseries/suspend.c accordingly. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231106-rtas-trivial-v1-5-61847655c51f@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/platforms/pseries/pseries.h | 1 + arch/powerpc/platforms/pseries/suspend.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index c6568a647cd0..2365668fc13e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -444,7 +444,6 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern time64_t last_rtas_event; extern int clobbering_unread_rtas_event(void); -extern void post_mobility_fixup(void); int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else static inline int clobbering_unread_rtas_event(void) { return 0; } diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 8376f03f932a..bba4ad192b0f 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -55,6 +55,7 @@ extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); extern int dlpar_unisolate_drc(u32 drc_index); +extern void post_mobility_fixup(void); void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 5c43435472cc..382003dfdb9a 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,6 +13,7 @@ #include #include #include +#include "pseries.h" static struct device suspend_dev; -- cgit v1.2.3 From 19773eda86e289526b7f08fa56c92e75cd7796f6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 6 Nov 2023 07:42:58 -0600 Subject: powerpc/rtas: Remove trailing space Use scripts/cleanfile to remove instances of trailing space in the core RTAS code and header. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231106-rtas-trivial-v1-6-61847655c51f@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 6 +++--- arch/powerpc/kernel/rtas.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 2365668fc13e..1bed6be8ada3 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -268,7 +268,7 @@ typedef struct { #define RTAS_TYPE_DEALLOC 0xE3 #define RTAS_TYPE_DUMP 0xE4 #define RTAS_TYPE_HOTPLUG 0xE5 -/* I don't add PowerMGM events right now, this is a different topic */ +/* I don't add PowerMGM events right now, this is a different topic */ #define RTAS_TYPE_PMGM_POWER_SW_ON 0x60 #define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61 #define RTAS_TYPE_PMGM_LID_OPEN 0x62 @@ -461,7 +461,7 @@ static inline void rtas_cancel_event_scan(void) { } /* Error types logged. */ #define ERR_FLAG_ALREADY_LOGGED 0x0 -#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */ +#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */ #define ERR_TYPE_RTAS_LOG 0x2 /* from rtas event-scan */ #define ERR_TYPE_KERNEL_PANIC 0x4 /* from die()/panic() */ #define ERR_TYPE_KERNEL_PANIC_GZ 0x8 /* ditto, compressed */ @@ -471,7 +471,7 @@ static inline void rtas_cancel_event_scan(void) { } (ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ) #define RTAS_DEBUG KERN_DEBUG "RTAS: " - + #define RTAS_ERROR_LOG_MAX 2048 /* diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index b5b340a91157..c49f078382a9 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -670,7 +670,7 @@ static void call_rtas_display_status_delay(char c) static int pending_newline = 0; /* did last write end with unprinted newline? */ static int width = 16; - if (c == '\n') { + if (c == '\n') { while (width-- > 0) call_rtas_display_status(' '); width = 16; @@ -680,7 +680,7 @@ static void call_rtas_display_status_delay(char c) if (pending_newline) { call_rtas_display_status('\r'); call_rtas_display_status('\n'); - } + } pending_newline = 0; if (width--) { call_rtas_display_status(c); @@ -820,7 +820,7 @@ void rtas_progress(char *s, unsigned short hex) else rtas_call(display_character, 1, 1, NULL, '\r'); } - + if (row_width) width = row_width[current_line]; else @@ -840,9 +840,9 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); return; } - + /* RTAS wants CR-LF, not just LF */ - + if (*os == '\n') { rtas_call(display_character, 1, 1, NULL, '\r'); rtas_call(display_character, 1, 1, NULL, '\n'); @@ -852,7 +852,7 @@ void rtas_progress(char *s, unsigned short hex) */ rtas_call(display_character, 1, 1, NULL, *os); } - + if (row_width) width = row_width[current_line]; else @@ -861,15 +861,15 @@ void rtas_progress(char *s, unsigned short hex) width--; rtas_call(display_character, 1, 1, NULL, *os); } - + os++; - + /* if we overwrite the screen length */ if (width <= 0) while ((*os != 0) && (*os != '\n') && (*os != '\r')) os++; } - + spin_unlock(&progress_lock); } EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ -- cgit v1.2.3 From 646477fc47905157a8440cdc45aad22901b5b3ce Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 6 Nov 2023 07:42:59 -0600 Subject: powerpc/rtas: Remove 'extern' from function declarations in rtas.h This header occasionally gains new function declarations without the leading extern in accordance with current style rules. Leaving the legacy externs in place is making the header more difficult to read over time because of the inconsistency. Remove them. Signed-off-by: Nathan Lynch [mpe: Add names to rtas_call() parameters] Signed-off-by: Michael Ellerman Link: https://msgid.link/20231106-rtas-trivial-v1-7-61847655c51f@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 53 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 1bed6be8ada3..a7110ed52e25 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -408,42 +408,41 @@ static inline bool rtas_function_implemented(const rtas_fn_handle_t handle) { return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE; } -extern int rtas_token(const char *service); -extern int rtas_call(int token, int, int, int *, ...); +int rtas_token(const char *service); +int rtas_call(int token, int nargs, int nret, int *outputs, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); -extern void __noreturn rtas_restart(char *cmd); -extern void rtas_power_off(void); -extern void __noreturn rtas_halt(void); -extern void rtas_os_term(char *str); +void __noreturn rtas_restart(char *cmd); +void rtas_power_off(void); +void __noreturn rtas_halt(void); +void rtas_os_term(char *str); void rtas_activate_firmware(void); -extern int rtas_get_sensor(int sensor, int index, int *state); -extern int rtas_get_sensor_fast(int sensor, int index, int *state); -extern int rtas_get_power_level(int powerdomain, int *level); -extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); -extern bool rtas_indicator_present(int token, int *maxindex); -extern int rtas_set_indicator(int indicator, int index, int new_value); -extern int rtas_set_indicator_fast(int indicator, int index, int new_value); -extern void rtas_progress(char *s, unsigned short hex); +int rtas_get_sensor(int sensor, int index, int *state); +int rtas_get_sensor_fast(int sensor, int index, int *state); +int rtas_get_power_level(int powerdomain, int *level); +int rtas_set_power_level(int powerdomain, int level, int *setlevel); +bool rtas_indicator_present(int token, int *maxindex); +int rtas_set_indicator(int indicator, int index, int new_value); +int rtas_set_indicator_fast(int indicator, int index, int new_value); +void rtas_progress(char *s, unsigned short hex); int rtas_ibm_suspend_me(int *fw_status); int rtas_error_rc(int rtas_rc); struct rtc_time; -extern time64_t rtas_get_boot_time(void); -extern void rtas_get_rtc_time(struct rtc_time *rtc_time); -extern int rtas_set_rtc_time(struct rtc_time *rtc_time); +time64_t rtas_get_boot_time(void); +void rtas_get_rtc_time(struct rtc_time *rtc_time); +int rtas_set_rtc_time(struct rtc_time *rtc_time); -extern unsigned int rtas_busy_delay_time(int status); +unsigned int rtas_busy_delay_time(int status); bool rtas_busy_delay(int status); -extern int early_init_dt_scan_rtas(unsigned long node, - const char *uname, int depth, void *data); +int early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data); -extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); +void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern time64_t last_rtas_event; -extern int clobbering_unread_rtas_event(void); +int clobbering_unread_rtas_event(void); int rtas_syscall_dispatch_ibm_suspend_me(u64 handle); #else static inline int clobbering_unread_rtas_event(void) { return 0; } @@ -454,7 +453,7 @@ static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) #endif #ifdef CONFIG_PPC_RTAS_DAEMON -extern void rtas_cancel_event_scan(void); +void rtas_cancel_event_scan(void); #else static inline void rtas_cancel_event_scan(void) { } #endif @@ -479,7 +478,7 @@ static inline void rtas_cancel_event_scan(void) { } * for all rtas calls that require an error buffer argument. * This includes 'check-exception' and 'rtas-last-error'. */ -extern int rtas_get_error_log_max(void); +int rtas_get_error_log_max(void); /* Event Scan Parameters */ #define EVENT_SCAN_ALL_EVENTS 0xf0000000 @@ -518,8 +517,8 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg) (devfn << 8) | (reg & 0xff); } -extern void rtas_give_timebase(void); -extern void rtas_take_timebase(void); +void rtas_give_timebase(void); +void rtas_take_timebase(void); #ifdef CONFIG_PPC_RTAS static inline int page_is_rtas_user_buf(unsigned long pfn) @@ -532,7 +531,7 @@ static inline int page_is_rtas_user_buf(unsigned long pfn) /* Not the best place to put pSeries_coalesce_init, will be fixed when we * move some of the rtas suspend-me stuff to pseries */ -extern void pSeries_coalesce_init(void); +void pSeries_coalesce_init(void); void rtas_initialize(void); #else static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;} -- cgit v1.2.3 From 183bc0c640c785a710885a10b614193f114fe760 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Oct 2023 22:27:25 +1100 Subject: powerpc/configs/64s: Enable CONFIG_MEM_SOFT_DIRTY Enable CONFIG_MEM_SOFT_DIRTY to get some test coverage. Distros enable it, and it has been broken previously. See commit 66b2ca086210 ("powerpc/64s/radix: Fix soft dirty tracking"). Signed-off-by: Michael Ellerman Link: https://msgid.link/20231024112726.1819795-1-mpe@ellerman.id.au --- arch/powerpc/configs/ppc64_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 6e7b9e8fd225..544a65fda77b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -92,6 +92,7 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZONE_DEVICE=y CONFIG_NET=y CONFIG_PACKET=y -- cgit v1.2.3 From 98eb30fe4c69a9b602f29e406317c49b5580352a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Oct 2023 12:24:52 +1100 Subject: powerpc: Make cpu_spec __ro_after_init The cpu_spec is a struct holding various information about the CPU the kernel is executing on. It's populated early in boot and must not change after that. In particular the cpu_features and mmu_features hold the set of discovered CPU/MMU features and are used to set static keys for each feature, and do binary patching of assembly. So any change to the cpu_features/mmu_features later in boot will not be reflected in the state of the static keys or patched code. There is already logic to check that cpu_features/mmu_features don't change, see check_features() in feature-fixups.c. But as another layer of protection the entire cpu_spec should be read only after init, annotate it as such. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231025012452.1985680-1-mpe@ellerman.id.au --- arch/powerpc/kernel/cputable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e97a0fd0ae90..6f6801da9dc1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -20,9 +20,9 @@ #include #include -static struct cpu_spec the_cpu_spec __read_mostly; +static struct cpu_spec the_cpu_spec __ro_after_init; -struct cpu_spec* cur_cpu_spec __read_mostly = NULL; +struct cpu_spec *cur_cpu_spec __ro_after_init = NULL; EXPORT_SYMBOL(cur_cpu_spec); /* The platform string corresponding to the real PVR */ -- cgit v1.2.3 From 6f2a9e0e0ae5fb0697dd1660ede7e609be25ff6f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 13 Nov 2023 15:39:47 +1100 Subject: powerpc: Remove orphaned reg_a2.h Commit fb5a515704d7 ("powerpc: Remove platforms/wsp and associated pieces") removed the A2 CPU support, but missed removal of reg_a2.h. None of the defines contained in it are used, with the exception of the SPRN_TEN* values, but they are also defined in reg_booke.h. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231113043947.1931831-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/reg_a2.h | 154 ----------------------------------- arch/powerpc/kernel/exceptions-64e.S | 1 - arch/powerpc/kernel/udbg_16550.c | 1 - 3 files changed, 156 deletions(-) delete mode 100644 arch/powerpc/include/asm/reg_a2.h (limited to 'arch') diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h deleted file mode 100644 index 74fba29e9491..000000000000 --- a/arch/powerpc/include/asm/reg_a2.h +++ /dev/null @@ -1,154 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Register definitions specific to the A2 core - * - * Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. - */ - -#ifndef __ASM_POWERPC_REG_A2_H__ -#define __ASM_POWERPC_REG_A2_H__ - -#include - -#define SPRN_TENSR 0x1b5 -#define SPRN_TENS 0x1b6 /* Thread ENable Set */ -#define SPRN_TENC 0x1b7 /* Thread ENable Clear */ - -#define SPRN_A2_CCR0 0x3f0 /* Core Configuration Register 0 */ -#define SPRN_A2_CCR1 0x3f1 /* Core Configuration Register 1 */ -#define SPRN_A2_CCR2 0x3f2 /* Core Configuration Register 2 */ -#define SPRN_MMUCR0 0x3fc /* MMU Control Register 0 */ -#define SPRN_MMUCR1 0x3fd /* MMU Control Register 1 */ -#define SPRN_MMUCR2 0x3fe /* MMU Control Register 2 */ -#define SPRN_MMUCR3 0x3ff /* MMU Control Register 3 */ - -#define SPRN_IAR 0x372 - -#define SPRN_IUCR0 0x3f3 -#define IUCR0_ICBI_ACK 0x1000 - -#define SPRN_XUCR0 0x3f6 /* Execution Unit Config Register 0 */ - -#define A2_IERAT_SIZE 16 -#define A2_DERAT_SIZE 32 - -/* A2 MMUCR0 bits */ -#define MMUCR0_ECL 0x80000000 /* Extended Class for TLB fills */ -#define MMUCR0_TID_NZ 0x40000000 /* TID is non-zero */ -#define MMUCR0_TS 0x10000000 /* Translation space for TLB fills */ -#define MMUCR0_TGS 0x20000000 /* Guest space for TLB fills */ -#define MMUCR0_TLBSEL 0x0c000000 /* TLB or ERAT target for TLB fills */ -#define MMUCR0_TLBSEL_U 0x00000000 /* TLBSEL = UTLB */ -#define MMUCR0_TLBSEL_I 0x08000000 /* TLBSEL = I-ERAT */ -#define MMUCR0_TLBSEL_D 0x0c000000 /* TLBSEL = D-ERAT */ -#define MMUCR0_LOCKSRSH 0x02000000 /* Use TLB lock on tlbsx. */ -#define MMUCR0_TID_MASK 0x000000ff /* TID field */ - -/* A2 MMUCR1 bits */ -#define MMUCR1_IRRE 0x80000000 /* I-ERAT round robin enable */ -#define MMUCR1_DRRE 0x40000000 /* D-ERAT round robin enable */ -#define MMUCR1_REE 0x20000000 /* Reference Exception Enable*/ -#define MMUCR1_CEE 0x10000000 /* Change exception enable */ -#define MMUCR1_CSINV_ALL 0x00000000 /* Inval ERAT on all CS evts */ -#define MMUCR1_CSINV_NISYNC 0x04000000 /* Inval ERAT on all ex isync*/ -#define MMUCR1_CSINV_NEVER 0x0c000000 /* Don't inval ERAT on CS */ -#define MMUCR1_ICTID 0x00080000 /* IERAT class field as TID */ -#define MMUCR1_ITTID 0x00040000 /* IERAT thdid field as TID */ -#define MMUCR1_DCTID 0x00020000 /* DERAT class field as TID */ -#define MMUCR1_DTTID 0x00010000 /* DERAT thdid field as TID */ -#define MMUCR1_DCCD 0x00008000 /* DERAT class ignore */ -#define MMUCR1_TLBWE_BINV 0x00004000 /* back invalidate on tlbwe */ - -/* A2 MMUCR2 bits */ -#define MMUCR2_PSSEL_SHIFT 4 - -/* A2 MMUCR3 bits */ -#define MMUCR3_THID 0x0000000f /* Thread ID */ - -/* *** ERAT TLB bits definitions */ -#define TLB0_EPN_MASK ASM_CONST(0xfffffffffffff000) -#define TLB0_CLASS_MASK ASM_CONST(0x0000000000000c00) -#define TLB0_CLASS_00 ASM_CONST(0x0000000000000000) -#define TLB0_CLASS_01 ASM_CONST(0x0000000000000400) -#define TLB0_CLASS_10 ASM_CONST(0x0000000000000800) -#define TLB0_CLASS_11 ASM_CONST(0x0000000000000c00) -#define TLB0_V ASM_CONST(0x0000000000000200) -#define TLB0_X ASM_CONST(0x0000000000000100) -#define TLB0_SIZE_MASK ASM_CONST(0x00000000000000f0) -#define TLB0_SIZE_4K ASM_CONST(0x0000000000000010) -#define TLB0_SIZE_64K ASM_CONST(0x0000000000000030) -#define TLB0_SIZE_1M ASM_CONST(0x0000000000000050) -#define TLB0_SIZE_16M ASM_CONST(0x0000000000000070) -#define TLB0_SIZE_1G ASM_CONST(0x00000000000000a0) -#define TLB0_THDID_MASK ASM_CONST(0x000000000000000f) -#define TLB0_THDID_0 ASM_CONST(0x0000000000000001) -#define TLB0_THDID_1 ASM_CONST(0x0000000000000002) -#define TLB0_THDID_2 ASM_CONST(0x0000000000000004) -#define TLB0_THDID_3 ASM_CONST(0x0000000000000008) -#define TLB0_THDID_ALL ASM_CONST(0x000000000000000f) - -#define TLB1_RESVATTR ASM_CONST(0x00f0000000000000) -#define TLB1_U0 ASM_CONST(0x0008000000000000) -#define TLB1_U1 ASM_CONST(0x0004000000000000) -#define TLB1_U2 ASM_CONST(0x0002000000000000) -#define TLB1_U3 ASM_CONST(0x0001000000000000) -#define TLB1_R ASM_CONST(0x0000800000000000) -#define TLB1_C ASM_CONST(0x0000400000000000) -#define TLB1_RPN_MASK ASM_CONST(0x000003fffffff000) -#define TLB1_W ASM_CONST(0x0000000000000800) -#define TLB1_I ASM_CONST(0x0000000000000400) -#define TLB1_M ASM_CONST(0x0000000000000200) -#define TLB1_G ASM_CONST(0x0000000000000100) -#define TLB1_E ASM_CONST(0x0000000000000080) -#define TLB1_VF ASM_CONST(0x0000000000000040) -#define TLB1_UX ASM_CONST(0x0000000000000020) -#define TLB1_SX ASM_CONST(0x0000000000000010) -#define TLB1_UW ASM_CONST(0x0000000000000008) -#define TLB1_SW ASM_CONST(0x0000000000000004) -#define TLB1_UR ASM_CONST(0x0000000000000002) -#define TLB1_SR ASM_CONST(0x0000000000000001) - -/* A2 erativax attributes definitions */ -#define ERATIVAX_RS_IS_ALL 0x000 -#define ERATIVAX_RS_IS_TID 0x040 -#define ERATIVAX_RS_IS_CLASS 0x080 -#define ERATIVAX_RS_IS_FULLMATCH 0x0c0 -#define ERATIVAX_CLASS_00 0x000 -#define ERATIVAX_CLASS_01 0x010 -#define ERATIVAX_CLASS_10 0x020 -#define ERATIVAX_CLASS_11 0x030 -#define ERATIVAX_PSIZE_4K (TLB_PSIZE_4K >> 1) -#define ERATIVAX_PSIZE_64K (TLB_PSIZE_64K >> 1) -#define ERATIVAX_PSIZE_1M (TLB_PSIZE_1M >> 1) -#define ERATIVAX_PSIZE_16M (TLB_PSIZE_16M >> 1) -#define ERATIVAX_PSIZE_1G (TLB_PSIZE_1G >> 1) - -/* A2 eratilx attributes definitions */ -#define ERATILX_T_ALL 0 -#define ERATILX_T_TID 1 -#define ERATILX_T_TGS 2 -#define ERATILX_T_FULLMATCH 3 -#define ERATILX_T_CLASS0 4 -#define ERATILX_T_CLASS1 5 -#define ERATILX_T_CLASS2 6 -#define ERATILX_T_CLASS3 7 - -/* XUCR0 bits */ -#define XUCR0_TRACE_UM_T0 0x40000000 /* Thread 0 */ -#define XUCR0_TRACE_UM_T1 0x20000000 /* Thread 1 */ -#define XUCR0_TRACE_UM_T2 0x10000000 /* Thread 2 */ -#define XUCR0_TRACE_UM_T3 0x08000000 /* Thread 3 */ - -/* A2 CCR0 register */ -#define A2_CCR0_PME_DISABLED 0x00000000 -#define A2_CCR0_PME_SLEEP 0x40000000 -#define A2_CCR0_PME_RVW 0x80000000 -#define A2_CCR0_PME_DISABLED2 0xc0000000 - -/* A2 CCR2 register */ -#define A2_CCR2_ERAT_ONLY_MODE 0x00000001 -#define A2_CCR2_ENABLE_ICSWX 0x00000002 -#define A2_CCR2_ENABLE_PC 0x20000000 -#define A2_CCR2_ENABLE_TRACE 0x40000000 - -#endif /* __ASM_POWERPC_REG_A2_H__ */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7ab4c8c0f1ab..dcf0591ad3c2 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 74ddf836f7a2..a0467e528b70 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -7,7 +7,6 @@ #include #include #include -#include #include extern u8 real_readb(volatile u8 __iomem *addr); -- cgit v1.2.3 From c8a1634145c23a5a979a7166a12b99871812a6ab Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 13 Nov 2023 16:19:29 +1100 Subject: powerpc/32: Drop unused grackle_set_stg() The call to grackle_set_stg() ("Store Gathering") has always been inside an #ifdef 0, since the code was first merged in v2.3.43pre7. Apparently it was suspected of causing problems on some hardware so was disabled. No one has ever proved otherwise so drop the code as unused for now. Reported-by: kernel test robot Reported-by: Bjorn Helgaas Closes: https://lore.kernel.org/all/20231031145600.GA9161@bhelgaas/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20231113051929.1952351-1-mpe@ellerman.id.au --- arch/powerpc/sysdev/grackle.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/grackle.c b/arch/powerpc/sysdev/grackle.c index fd2f94a884f0..7dce8278b71e 100644 --- a/arch/powerpc/sysdev/grackle.c +++ b/arch/powerpc/sysdev/grackle.c @@ -18,24 +18,8 @@ #define GRACKLE_CFA(b, d, o) (0x80 | ((b) << 8) | ((d) << 16) \ | (((o) & ~3) << 24)) -#define GRACKLE_PICR1_STG 0x00000040 #define GRACKLE_PICR1_LOOPSNOOP 0x00000010 -/* N.B. this is called before bridges is initialized, so we can't - use grackle_pcibios_{read,write}_config_dword. */ -static inline void grackle_set_stg(struct pci_controller* bp, int enable) -{ - unsigned int val; - - out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); - val = in_le32(bp->cfg_data); - val = enable? (val | GRACKLE_PICR1_STG) : - (val & ~GRACKLE_PICR1_STG); - out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); - out_le32(bp->cfg_data, val); - (void)in_le32(bp->cfg_data); -} - static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable) { unsigned int val; @@ -56,7 +40,4 @@ void __init setup_grackle(struct pci_controller *hose) pci_add_flags(PCI_REASSIGN_ALL_BUS); if (of_machine_is_compatible("AAPL,PowerBook1998")) grackle_set_loop_snoop(hose, 1); -#if 0 /* Disabled for now, HW problems ??? */ - grackle_set_stg(hose, 1); -#endif } -- cgit v1.2.3 From 1b1e38002648819c04773647d5242990e2824264 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 21 Nov 2023 08:23:32 +0900 Subject: powerpc: add crtsavres.o to always-y instead of extra-y crtsavres.o is linked to modules. However, as explained in commit d0e628cd817f ("kbuild: doc: clarify the difference between extra-y and always-y"), 'make modules' does not build extra-y. For example, the following command fails: $ make ARCH=powerpc LLVM=1 KBUILD_MODPOST_WARN=1 mrproper ps3_defconfig modules [snip] LD [M] arch/powerpc/platforms/cell/spufs/spufs.ko ld.lld: error: cannot open arch/powerpc/lib/crtsavres.o: No such file or directory make[3]: *** [scripts/Makefile.modfinal:56: arch/powerpc/platforms/cell/spufs/spufs.ko] Error 1 make[2]: *** [Makefile:1844: modules] Error 2 make[1]: *** [/home/masahiro/workspace/linux-kbuild/Makefile:350: __build_one_by_one] Error 2 make: *** [Makefile:234: __sub-make] Error 2 Signed-off-by: Masahiro Yamada Fixes: baa25b571a16 ("powerpc/64: Do not link crtsavres.o in vmlinux") Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20231120232332.4100288-1-masahiroy@kernel.org --- arch/powerpc/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 51ad0397c17a..6eac63e79a89 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -45,7 +45,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs ifndef CONFIG_LD_IS_BFD -extra-$(CONFIG_PPC64) += crtsavres.o +always-$(CONFIG_PPC64) += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ -- cgit v1.2.3 From 45b1ba7e5d1f6881050d558baf9bc74a2ae13930 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 22 Nov 2023 11:06:51 +0800 Subject: powerpc/xics: Check return value of kasprintf in icp_native_map_one_cpu kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Signed-off-by: Michael Ellerman Link: https://msgid.link/20231122030651.3818-1-chentao@kylinos.cn --- arch/powerpc/sysdev/xics/icp-native.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index f6ec6dba92dc..700b67476a7d 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -236,6 +236,8 @@ static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr, rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation", cpu, hw_id); + if (!rname) + return -ENOMEM; if (!request_mem_region(addr, size, rname)) { pr_warn("icp_native: Could not reserve ICP MMIO for CPU %d, interrupt server #0x%x\n", cpu, hw_id); -- cgit v1.2.3 From df99da19c6c24ab65052ae1bc0904f99069478d9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 21 Nov 2023 10:54:36 +1100 Subject: powerpc/lib: Avoid array bounds warnings in vec ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building with GCC with -Warray-bounds enabled there are several warnings in sstep.c along the lines of: In function ‘do_byte_reverse’, inlined from ‘do_vec_load’ at arch/powerpc/lib/sstep.c:691:3, inlined from ‘emulate_loadstore’ at arch/powerpc/lib/sstep.c:3439:9: arch/powerpc/lib/sstep.c:289:23: error: array subscript 2 is outside array bounds of ‘u8[16]’ {aka ‘unsigned char[16]’} [-Werror=array-bounds=] 289 | up[2] = byterev_8(up[1]); | ~~~~~~^~~~~~~~~~~~~~~~~~ arch/powerpc/lib/sstep.c: In function ‘emulate_loadstore’: arch/powerpc/lib/sstep.c:681:11: note: at offset 16 into object ‘u’ of size 16 681 | } u = {}; | ^ do_byte_reverse() supports a size up to 32 bytes, but in these cases the caller is only passing a 16 byte buffer. In practice there is no bug, do_vec_load() is only called from the LOAD_VMX case in emulate_loadstore(). That in turn is only reached when analyse_instr() recognises VMX ops, and in all cases the size is no greater than 16: $ git grep -w LOAD_VMX arch/powerpc/lib/sstep.c arch/powerpc/lib/sstep.c: op->type = MKOP(LOAD_VMX, 0, 1); arch/powerpc/lib/sstep.c: op->type = MKOP(LOAD_VMX, 0, 2); arch/powerpc/lib/sstep.c: op->type = MKOP(LOAD_VMX, 0, 4); arch/powerpc/lib/sstep.c: op->type = MKOP(LOAD_VMX, 0, 16); Similarly for do_vec_store(). Although the warning is incorrect, the code would be safer if it clamped the size from the caller to the known size of the buffer. Do that using min_t(). Reported-by: Bagas Sanjaya Closes: https://lore.kernel.org/linuxppc-dev/YpbUcPrm61RLIiZF@debian.me/ Reported-by: Jan-Benedict Glaw Closes: https://lore.kernel.org/linuxppc-dev/20221212215117.aa7255t7qd6yefk4@lug-owl.de/ Reported-by: "Gustavo A. R. Silva" Closes: https://lore.kernel.org/linuxppc-dev/6a8bf78c-aedb-4d5a-b0aa-82a51a17b884@embeddedor.com/ Reviewed-by: "Gustavo A. R. Silva" Build-tested-by: "Gustavo A. R. Silva" Signed-off-by: Michael Ellerman Link: https://msgid.link/20231120235436.1569255-1-mpe@ellerman.id.au --- arch/powerpc/lib/sstep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a4ab8625061a..a13f05cfc7db 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -688,7 +688,7 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, if (err) return err; if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); preempt_disable(); if (regs->msr & MSR_VEC) put_vr(rn, &u.v); @@ -719,7 +719,7 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u.v = current->thread.vr_state.vr[rn]; preempt_enable(); if (unlikely(cross_endian)) - do_byte_reverse(&u.b[ea & 0xf], size); + do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u))); return copy_mem_out(&u.b[ea & 0xf], ea, size, regs); } #endif /* CONFIG_ALTIVEC */ -- cgit v1.2.3 From 8f9abaa6d7de0a70fc68acaedce290c1f96e2e59 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Thu, 23 Nov 2023 12:47:05 +0530 Subject: powerpc/lib: Validate size for vector operations Some of the fp/vmx code in sstep.c assume a certain maximum size for the instructions being emulated. The size of those operations however is determined separately in analyse_instr(). Add a check to validate the assumption on the maximum size of the operations, so as to prevent any unintended kernel stack corruption. Signed-off-by: Naveen N Rao Reviewed-by: Gustavo A. R. Silva Build-tested-by: Gustavo A. R. Silva Signed-off-by: Michael Ellerman Link: https://msgid.link/20231123071705.397625-1-naveen@kernel.org --- arch/powerpc/lib/sstep.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a13f05cfc7db..5766180f5380 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -586,6 +586,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -636,6 +638,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -680,6 +684,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -707,6 +714,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ -- cgit v1.2.3 From 0d555b57ee660d8a871781c0eebf006e855e918d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 27 Nov 2023 13:28:09 +1100 Subject: powerpc: pmd_move_must_withdraw() is only needed for CONFIG_TRANSPARENT_HUGEPAGE The linux-next build of powerpc64 allnoconfig fails with: arch/powerpc/mm/book3s64/pgtable.c:557:5: error: no previous prototype for 'pmd_move_must_withdraw' 557 | int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, | ^~~~~~~~~~~~~~~~~~~~~~ Caused by commit: c6345dfa6e3e ("Makefile.extrawarn: turn on missing-prototypes globally") Fix it by moving the function definition under CONFIG_TRANSPARENT_HUGEPAGE like the prototype. The function is only called when CONFIG_TRANSPARENT_HUGEPAGE=y. Signed-off-by: Stephen Rothwell [mpe: Flesh out change log from linux-next patch] Signed-off-by: Michael Ellerman Link: https://msgid.link/20231127132809.45c2b398@canb.auug.org.au --- arch/powerpc/mm/book3s64/pgtable.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index be229290a6a7..3438ab72c346 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -542,6 +542,7 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, set_pte_at(vma->vm_mm, addr, ptep, pte); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * For hash translation mode, we use the deposited table to store hash slot * information and they are stored at PTRS_PER_PMD offset from related pmd @@ -563,6 +564,7 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } +#endif /* * Does the CPU support tlbie? -- cgit v1.2.3 From 9be4feb768b86c25da336a6c0f3e3caefd16f1e4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 27 Nov 2023 18:40:09 -0600 Subject: powerpc/rtas_pci: rename and properly expose config access APIs The rtas_read_config() and rtas_write_config() functions in kernel/rtas_pci.c have external linkage and two users in arch/powerpc: the rtas_pci code itself and the pseries platform's "enhanced error handling" (EEH) support code. The prototypes for these functions in asm/ppc-pci.h have until now been guarded by CONFIG_EEH since the only external caller is the pseries EEH code. However, this presumably has always generated warnings when built with !CONFIG_EEH and -Wmissing-prototypes: arch/powerpc/kernel/rtas_pci.c:46:5: error: no previous prototype for function 'rtas_read_config' [-Werror,-Wmissing-prototypes] 46 | int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) arch/powerpc/kernel/rtas_pci.c:98:5: error: no previous prototype for function 'rtas_write_config' [-Werror,-Wmissing-prototypes] 98 | int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) The introduction of commit c6345dfa6e3e ("Makefile.extrawarn: turn on missing-prototypes globally") forces the issue. The efika and chrp platform code have (static) functions with the same names but different signatures. We may as well eliminate the potential for conflicts and confusion by renaming the globally visible versions as their prototypes get moved out of the CONFIG_EEH-guarded region; their current names are too generic anyway. Since they operate on objects of the type 'struct pci_dn *', give them the slightly more verbose prefix "rtas_pci_dn_" and fix up all the call sites. Fixes: c6345dfa6e3e ("Makefile.extrawarn: turn on missing-prototypes globally") Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/linuxppc-dev/CA+G9fYt0LLXtjSz+Hkf3Fhm-kf0ZQanrhUS+zVZGa3O+Wt2+vg@mail.gmail.com/ Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231127-rtas-pci-rw-config-v1-1-385d29ace3df@linux.ibm.com --- arch/powerpc/include/asm/ppc-pci.h | 5 +++-- arch/powerpc/kernel/rtas_pci.c | 8 ++++---- arch/powerpc/platforms/pseries/eeh_pseries.c | 18 +++++++++--------- 3 files changed, 16 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index d9fcff575027..ce2b1b5eebdd 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -35,6 +35,9 @@ extern void init_pci_config_tokens (void); extern unsigned long get_phb_buid (struct device_node *); extern int rtas_setup_phb(struct pci_controller *phb); +int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val); +int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val); + #ifdef CONFIG_EEH void eeh_addr_cache_insert_dev(struct pci_dev *dev); @@ -44,8 +47,6 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed); void eeh_save_bars(struct eeh_dev *edev); -int rtas_write_config(struct pci_dn *, int where, int size, u32 val); -int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_mark_isolated(struct eeh_pe *pe); void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index e1fdc7473b72..fccf96e897f6 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -43,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where) return 0; } -int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { int returnval = -1; unsigned long buid, addr; @@ -87,7 +87,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here */ - ret = rtas_read_config(pdn, where, size, val); + ret = rtas_pci_dn_read_config(pdn, where, size, val); if (*val == EEH_IO_ERROR_VALUE(size) && eeh_dev_check_failure(pdn_to_eeh_dev(pdn))) return PCIBIOS_DEVICE_NOT_FOUND; @@ -95,7 +95,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, return ret; } -int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) +int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val) { unsigned long buid, addr; int ret; @@ -134,7 +134,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here. */ - return rtas_write_config(pdn, where, size, val); + return rtas_pci_dn_write_config(pdn, where, size, val); } static struct pci_ops rtas_pci_ops = { diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index def184da51cf..b1ae0c0d1187 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -252,7 +252,7 @@ static int pseries_eeh_cap_start(struct pci_dn *pdn) if (!pdn) return 0; - rtas_read_config(pdn, PCI_STATUS, 2, &status); + rtas_pci_dn_read_config(pdn, PCI_STATUS, 2, &status); if (!(status & PCI_STATUS_CAP_LIST)) return 0; @@ -270,11 +270,11 @@ static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap) return 0; while (cnt--) { - rtas_read_config(pdn, pos, 1, &pos); + rtas_pci_dn_read_config(pdn, pos, 1, &pos); if (pos < 0x40) break; pos &= ~3; - rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + rtas_pci_dn_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); if (id == 0xff) break; if (id == cap) @@ -294,7 +294,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) if (!edev || !edev->pcie_cap) return 0; - if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) return 0; else if (!header) return 0; @@ -307,7 +307,7 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) if (pos < 256) break; - if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + if (rtas_pci_dn_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) break; } @@ -412,8 +412,8 @@ static void pseries_eeh_init_edev(struct pci_dn *pdn) if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { - rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, - 2, &pcie_flags); + rtas_pci_dn_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) edev->mode |= EEH_DEV_ROOT_PORT; @@ -676,7 +676,7 @@ static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u3 { struct pci_dn *pdn = eeh_dev_to_pdn(edev); - return rtas_read_config(pdn, where, size, val); + return rtas_pci_dn_read_config(pdn, where, size, val); } /** @@ -692,7 +692,7 @@ static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u { struct pci_dn *pdn = eeh_dev_to_pdn(edev); - return rtas_write_config(pdn, where, size, val); + return rtas_pci_dn_write_config(pdn, where, size, val); } #ifdef CONFIG_PCI_IOV -- cgit v1.2.3 From 360f051d82ee0cc580edfffe9e8c0b93011ab86d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Nov 2023 00:19:15 +1100 Subject: powerpc/suspend: Add prototype for do_after_copyback() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With HIBERNATION=y the build breaks with: arch/powerpc/kernel/swsusp_64.c:14:6: error: no previous prototype for ‘do_after_copyback’ [-Werror=missing-prototypes] 14 | void do_after_copyback(void) | ^~~~~~~~~~~~~~~~~ do_after_copyback() is only called from asm, so there is no prototype, nor any header where it makes sense to place one. Just add a prototype in the C file to fix the build error. Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20231129131919.2528517-1-mpe@ellerman.id.au --- arch/powerpc/kernel/swsusp_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c index 16ee3baaf09a..50fa8fc9ef95 100644 --- a/arch/powerpc/kernel/swsusp_64.c +++ b/arch/powerpc/kernel/swsusp_64.c @@ -11,6 +11,8 @@ #include #include +void do_after_copyback(void); + void do_after_copyback(void) { iommu_restore(); -- cgit v1.2.3 From 24afc61990de29dd47be7642c196a173f6cc21fc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Nov 2023 00:19:16 +1100 Subject: powerpc/512x: Make pdm360ng_init() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mpc512x_defconfig config fails with: arch/powerpc/platforms/512x/pdm360ng.c:104:13: error: no previous prototype for ‘pdm360ng_init’ [-Werror=missing-prototypes] 104 | void __init pdm360ng_init(void) | ^~~~~~~~~~~~~ Fix it by making pdm360ng_init() static. Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20231129131919.2528517-2-mpe@ellerman.id.au --- arch/powerpc/platforms/512x/pdm360ng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index ce51cfeeb066..8bbbf78bb42b 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -101,7 +101,7 @@ static inline void __init pdm360ng_touchscreen_init(void) } #endif /* CONFIG_TOUCHSCREEN_ADS7846 */ -void __init pdm360ng_init(void) +static void __init pdm360ng_init(void) { mpc512x_init(); pdm360ng_touchscreen_init(); -- cgit v1.2.3 From 10feb8f9612239b665815807e950bcd999a75dd2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Nov 2023 00:19:17 +1100 Subject: powerpc/512x: Fix missing prototype warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mpc512x_defconfig build fails with: arch/powerpc/platforms/512x/mpc5121_ads_cpld.c:142:1: error: no previous prototype for ‘mpc5121_ads_cpld_map’ [-Werror=missing-prototypes] 142 | mpc5121_ads_cpld_map(void) | ^~~~~~~~~~~~~~~~~~~~ arch/powerpc/platforms/512x/mpc5121_ads_cpld.c:157:1: error: no previous prototype for ‘mpc5121_ads_cpld_pic_init’ [-Werror=missing-prototypes] 157 | mpc5121_ads_cpld_pic_init(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~ There are prototypes for these functions but the header they are in is not included by mpc5121_ads_cpld.c. Include it to fix the build error. Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20231129131919.2528517-3-mpe@ellerman.id.au --- arch/powerpc/platforms/512x/mpc5121_ads_cpld.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index 6f08d07aee3b..e995eb30bf09 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -17,6 +17,8 @@ #include #include +#include "mpc5121_ads.h" + static struct device_node *cpld_pic_node; static struct irq_domain *cpld_pic_host; -- cgit v1.2.3 From b90ad501715f2feb1b0bf97aa700adb39c78deb3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Nov 2023 00:19:18 +1100 Subject: powerpc/44x: Make ppc44x_idle_init() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 44x/fsp2_defconfig build fails with: arch/powerpc/platforms/44x/idle.c:30:12: error: no previous prototype for ‘ppc44x_idle_init’ [-Werror=missing-prototypes] 30 | int __init ppc44x_idle_init(void) | ^~~~~~~~~~~~~~~~ Fix it by making ppc44x_idle_init() static. Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20231129131919.2528517-4-mpe@ellerman.id.au --- arch/powerpc/platforms/44x/idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c index f533b495e7db..e2eeef8dff78 100644 --- a/arch/powerpc/platforms/44x/idle.c +++ b/arch/powerpc/platforms/44x/idle.c @@ -27,7 +27,7 @@ static void ppc44x_idle(void) isync(); } -int __init ppc44x_idle_init(void) +static int __init ppc44x_idle_init(void) { if (!mode_spin) { /* If we are not setting spin mode -- cgit v1.2.3 From ede66cd22441820cbd399936bf84fdc4294bc7fa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Nov 2023 00:19:19 +1100 Subject: powerpc/64s: Fix CONFIG_NUMA=n build due to create_section_mapping() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CONFIG_NUMA=n the build fails with: arch/powerpc/mm/book3s64/pgtable.c:275:15: error: no previous prototype for ‘create_section_mapping’ [-Werror=missing-prototypes] 275 | int __meminit create_section_mapping(unsigned long start, unsigned long end, | ^~~~~~~~~~~~~~~~~~~~~~ That happens because the prototype for create_section_mapping() is in asm/mmzone.h, but asm/mmzone.h is only included by linux/mmzone.h when CONFIG_NUMA=y. In fact the prototype is only needed by arch/powerpc/mm code, so move the prototype into arch/powerpc/mm/mmu_decl.h, which also fixes the build error. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231129131919.2528517-5-mpe@ellerman.id.au --- arch/powerpc/include/asm/mmzone.h | 5 ----- arch/powerpc/mm/mmu_decl.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 4c6c6dbd182f..4740ca230d36 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -46,10 +46,5 @@ u64 memory_hotplug_max(void); #define __HAVE_ARCH_RESERVED_KERNEL_PAGES #endif -#ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, - int nid, pgprot_t prot); -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7f9ff0640124..72341b9fb552 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -181,3 +181,8 @@ static inline bool debug_pagealloc_enabled_or_kfence(void) { return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); } + +#ifdef CONFIG_MEMORY_HOTPLUG +int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); +#endif -- cgit v1.2.3 From d8c3f243d4db24675b653f0568bb65dae34e6455 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Nov 2023 22:44:32 +1100 Subject: powerpc/mm: Fix build failures due to arch_reserved_kernel_pages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With NUMA=n and FA_DUMP=y or PRESERVE_FA_DUMP=y the build fails with: arch/powerpc/kernel/fadump.c:1739:22: error: no previous prototype for ‘arch_reserved_kernel_pages’ [-Werror=missing-prototypes] 1739 | unsigned long __init arch_reserved_kernel_pages(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ The prototype for arch_reserved_kernel_pages() is in include/linux/mm.h, but it's guarded by __HAVE_ARCH_RESERVED_KERNEL_PAGES. The powerpc headers define __HAVE_ARCH_RESERVED_KERNEL_PAGES in asm/mmzone.h, which is not included into the generic headers when NUMA=n. Move the definition of __HAVE_ARCH_RESERVED_KERNEL_PAGES into asm/mmu.h which is included regardless of NUMA=n. Additionally the ifdef around __HAVE_ARCH_RESERVED_KERNEL_PAGES needs to also check for CONFIG_PRESERVE_FA_DUMP. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231130114433.3053544-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/mmu.h | 4 ++++ arch/powerpc/include/asm/mmzone.h | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 52cc25864a1b..d8b7e246a32f 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -412,5 +412,9 @@ extern void *abatron_pteptrs[2]; #include #endif +#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) +#define __HAVE_ARCH_RESERVED_KERNEL_PAGES +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_MMU_H_ */ diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 4740ca230d36..da827d2d0866 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -42,9 +42,6 @@ u64 memory_hotplug_max(void); #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif /* CONFIG_NUMA */ -#ifdef CONFIG_FA_DUMP -#define __HAVE_ARCH_RESERVED_KERNEL_PAGES -#endif #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ -- cgit v1.2.3 From f8d3555355653848082c351fa90775214fb8a4fa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Nov 2023 22:44:33 +1100 Subject: powerpc: Fix build error due to is_valid_bugaddr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CONFIG_GENERIC_BUG=n the build fails with: arch/powerpc/kernel/traps.c:1442:5: error: no previous prototype for ‘is_valid_bugaddr’ [-Werror=missing-prototypes] 1442 | int is_valid_bugaddr(unsigned long addr) | ^~~~~~~~~~~~~~~~ The prototype is only defined, and the function is only needed, when CONFIG_GENERIC_BUG=y, so move the implementation under that. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231130114433.3053544-2-mpe@ellerman.id.au --- arch/powerpc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5ea2014aff90..11e062b47d3f 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1439,10 +1439,12 @@ static int emulate_instruction(struct pt_regs *regs) return -EINVAL; } +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { return is_kernel_addr(addr); } +#endif #ifdef CONFIG_MATH_EMULATION static int emulate_math(struct pt_regs *regs) -- cgit v1.2.3 From e12d8e2602d2bcd26022eff3e2519d25925e760c Mon Sep 17 00:00:00 2001 From: Zhao Ke Date: Wed, 29 Nov 2023 15:58:45 +0800 Subject: powerpc: Add PVN support for HeXin C2000 processor HeXin Tech Co. has applied for a new PVN from the OpenPower Community for its new processor C2000. The OpenPower has assigned a new PVN and this newly assigned PVN is 0x0066, add pvr register related support for this PVN. Signed-off-by: Zhao Ke Link: https://discuss.openpower.foundation/t/how-to-get-a-new-pvr-for-processors-follow-power-isa/477/10 Signed-off-by: Michael Ellerman Link: https://msgid.link/20231129075845.57976-1-ke.zhao@shingroup.cn --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kernel/cpu_specs_book3s_64.h | 15 +++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/mm/book3s64/pkeys.c | 3 ++- arch/powerpc/platforms/powernv/subcore.c | 3 ++- drivers/misc/cxl/cxl.h | 3 ++- 6 files changed, 23 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 4ae4ab9090a2..7fd09f25452d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1361,6 +1361,7 @@ #define PVR_POWER8E 0x004B #define PVR_POWER8NVL 0x004C #define PVR_POWER8 0x004D +#define PVR_HX_C2000 0x0066 #define PVR_POWER9 0x004E #define PVR_POWER10 0x0080 #define PVR_BE 0x0070 diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h index c370c1b804a9..3ff9757df4c0 100644 --- a/arch/powerpc/kernel/cpu_specs_book3s_64.h +++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h @@ -238,6 +238,21 @@ static struct cpu_spec cpu_specs[] __initdata = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* 2.07-compliant processor, HeXin C2000 processor */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00660000, + .cpu_name = "HX-C2000", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ .pvr_mask = 0xffffffff, .pvr_value = 0x0f000005, diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9118242063fb..5b92619a05fd 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -604,6 +604,7 @@ static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: + case PVR_HX_C2000: case PVR_POWER9: vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | BOOK3S_HFLAG_NEW_TLBIE; diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 125733962033..a974baf8f327 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -89,7 +89,8 @@ static int __init scan_pkey_feature(void) unsigned long pvr = mfspr(SPRN_PVR); if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || - PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9) + PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9 || + PVR_VER(pvr) == PVR_HX_C2000) pkeys_total = 32; } } diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 191424468f10..393e747541fb 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -425,7 +425,8 @@ static int subcore_init(void) if (pvr_ver != PVR_POWER8 && pvr_ver != PVR_POWER8E && - pvr_ver != PVR_POWER8NVL) + pvr_ver != PVR_POWER8NVL && + pvr_ver != PVR_HX_C2000) return 0; /* diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 0562071cdd4a..6ad0ab892675 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -836,7 +836,8 @@ static inline bool cxl_is_power8(void) { if ((pvr_version_is(PVR_POWER8E)) || (pvr_version_is(PVR_POWER8NVL)) || - (pvr_version_is(PVR_POWER8))) + (pvr_version_is(PVR_POWER8)) || + (pvr_version_is(PVR_HX_C2000))) return true; return false; } -- cgit v1.2.3 From a9e1e4d6e8c77c732e8084b03bae0c78cafdceb0 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 24 Nov 2023 11:02:37 +0100 Subject: powerpc/85xx: Fix typo in code comment s/singals/signals/ Signed-off-by: Dario Binacchi Signed-off-by: Michael Ellerman Link: https://msgid.link/20231124100241.660374-1-dario.binacchi@amarulasolutions.com --- arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index ec9f60fbebc7..e0cec670d8db 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -76,7 +76,7 @@ static void __init mpc85xx_rdb_setup_arch(void) /* P1025 has pins muxed for QE and other functions. To * enable QE UEC mode, we need to set bit QE0 for UCC1 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9 - * and QE12 for QE MII management singals in PMUXCR + * and QE12 for QE MII management signals in PMUXCR * register. */ setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) | -- cgit v1.2.3 From 4a74197b65e69c46fe6e53f7df2f4d6ce9ffe012 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 30 Nov 2023 21:51:59 -0800 Subject: powerpc/44x: select I2C for CURRITUCK Fix build errors when CURRITUCK=y and I2C is not builtin (=m or is not set). Fixes these build errors: powerpc-linux-ld: arch/powerpc/platforms/44x/ppc476.o: in function `avr_halt_system': ppc476.c:(.text+0x58): undefined reference to `i2c_smbus_write_byte_data' powerpc-linux-ld: arch/powerpc/platforms/44x/ppc476.o: in function `ppc47x_device_probe': ppc476.c:(.init.text+0x18): undefined reference to `i2c_register_driver' Fixes: 2a2c74b2efcb ("IBM Akebono: Add the Akebono platform") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Closes: lore.kernel.org/r/202312010820.cmdwF5X9-lkp@intel.com Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201055159.8371-1-rdunlap@infradead.org --- arch/powerpc/platforms/44x/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 1624ebf95497..35a1f4b9f827 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -173,6 +173,7 @@ config ISS4xx config CURRITUCK bool "IBM Currituck (476fpe) Support" depends on PPC_47x + select I2C select SWIOTLB select 476FPE select FORCE_PCI -- cgit v1.2.3 From bd68ffce69f6cf8ddd3a3c32549d1d2275e49fc5 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 14 Nov 2023 11:01:53 -0600 Subject: powerpc/pseries/memhp: Fix access beyond end of drmem array dlpar_memory_remove_by_index() may access beyond the bounds of the drmem lmb array when the LMB lookup fails to match an entry with the given DRC index. When the search fails, the cursor is left pointing to &drmem_info->lmbs[drmem_info->n_lmbs], which is one element past the last valid entry in the array. The debug message at the end of the function then dereferences this pointer: pr_debug("Failed to hot-remove memory at %llx\n", lmb->base_addr); This was found by inspection and confirmed with KASAN: pseries-hotplug-mem: Attempting to hot-remove LMB, drc index 1234 ================================================================== BUG: KASAN: slab-out-of-bounds in dlpar_memory+0x298/0x1658 Read of size 8 at addr c000000364e97fd0 by task bash/949 dump_stack_lvl+0xa4/0xfc (unreliable) print_report+0x214/0x63c kasan_report+0x140/0x2e0 __asan_load8+0xa8/0xe0 dlpar_memory+0x298/0x1658 handle_dlpar_errorlog+0x130/0x1d0 dlpar_store+0x18c/0x3e0 kobj_attr_store+0x68/0xa0 sysfs_kf_write+0xc4/0x110 kernfs_fop_write_iter+0x26c/0x390 vfs_write+0x2d4/0x4e0 ksys_write+0xac/0x1a0 system_call_exception+0x268/0x530 system_call_vectored_common+0x15c/0x2ec Allocated by task 1: kasan_save_stack+0x48/0x80 kasan_set_track+0x34/0x50 kasan_save_alloc_info+0x34/0x50 __kasan_kmalloc+0xd0/0x120 __kmalloc+0x8c/0x320 kmalloc_array.constprop.0+0x48/0x5c drmem_init+0x2a0/0x41c do_one_initcall+0xe0/0x5c0 kernel_init_freeable+0x4ec/0x5a0 kernel_init+0x30/0x1e0 ret_from_kernel_user_thread+0x14/0x1c The buggy address belongs to the object at c000000364e80000 which belongs to the cache kmalloc-128k of size 131072 The buggy address is located 0 bytes to the right of allocated 98256-byte region [c000000364e80000, c000000364e97fd0) ================================================================== pseries-hotplug-mem: Failed to hot-remove memory at 0 Log failed lookups with a separate message and dereference the cursor only when it points to a valid entry. Signed-off-by: Nathan Lynch Fixes: 51925fb3c5c9 ("powerpc/pseries: Implement memory hotplug remove in the kernel") Signed-off-by: Michael Ellerman Link: https://msgid.link/20231114-pseries-memhp-fixes-v1-1-fb8f2bb7c557@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index a43bfb01720a..6f2eebae7bee 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -436,14 +436,15 @@ static int dlpar_memory_remove_by_index(u32 drc_index) } } - if (!lmb_found) + if (!lmb_found) { + pr_debug("Failed to look up LMB for drc index %x\n", drc_index); rc = -EINVAL; - - if (rc) + } else if (rc) { pr_debug("Failed to hot-remove memory at %llx\n", lmb->base_addr); - else + } else { pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); + } return rc; } -- cgit v1.2.3 From 27951e1d8274e9f9a2925b069e4492939a3f2099 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 14 Nov 2023 11:01:55 -0600 Subject: powerpc/pseries/memhp: Log more error conditions in add path When an add operation for multiple LMBs fails, there is currently little indication from the kernel of what went wrong. Be a little more verbose about error conditions in the add paths. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231114-pseries-memhp-fixes-v1-3-fb8f2bb7c557@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-memory.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 6f2eebae7bee..3fe3ddb30c04 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -208,8 +208,10 @@ static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online) int rc; mem_block = lmb_to_memblock(lmb); - if (!mem_block) + if (!mem_block) { + pr_err("Failed memory block lookup for LMB 0x%x\n", lmb->drc_index); return -EINVAL; + } if (online && mem_block->dev.offline) rc = device_online(&mem_block->dev); @@ -576,6 +578,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = update_lmb_associativity_index(lmb); if (rc) { dlpar_release_drc(lmb->drc_index); + pr_err("Failed to configure LMB 0x%x\n", lmb->drc_index); return rc; } @@ -589,12 +592,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) /* Add the memory */ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY); if (rc) { + pr_err("Failed to add LMB 0x%x to node %u", lmb->drc_index, nid); invalidate_lmb_associativity_index(lmb); return rc; } rc = dlpar_online_lmb(lmb); if (rc) { + pr_err("Failed to online LMB 0x%x on node %u\n", lmb->drc_index, nid); __remove_memory(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); } else { -- cgit v1.2.3 From 7d370e1812b9a5f5cc68aaa5991bf7d31d8ff52c Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:06 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Invalidate RPT before deleting a guest An L0 must invalidate the L2's RPT during H_GUEST_DELETE if this has not already been done. This is a slow operation that means H_GUEST_DELETE must return H_BUSY multiple times before completing. Invalidating the tables before deleting the guest so there is less work for the L0 to do. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-2-vaibhav@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s.h | 1 + arch/powerpc/kvm/book3s_hv.c | 6 ++++-- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4f527d09c92b..a37736ed3728 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -302,6 +302,7 @@ void kvmhv_nested_exit(void); void kvmhv_vm_nested_init(struct kvm *kvm); long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu); +void kvmhv_flush_lpid(u64 lpid); void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1ed6ec140701..5543e8490cd9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5691,10 +5691,12 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0); } - if (kvmhv_is_nestedv2()) + if (kvmhv_is_nestedv2()) { + kvmhv_flush_lpid(kvm->arch.lpid); plpar_guest_delete(0, kvm->arch.lpid); - else + } else { kvmppc_free_lpid(kvm->arch.lpid); + } kvmppc_free_pimap(kvm); } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 3b658b8696bc..5c375ec1a3c6 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -503,7 +503,7 @@ void kvmhv_nested_exit(void) } } -static void kvmhv_flush_lpid(u64 lpid) +void kvmhv_flush_lpid(u64 lpid) { long rc; -- cgit v1.2.3 From e0d4acbcba3f2d63dc15bc5432c8e26fc9e19675 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:07 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Avoid reloading the tb offset The kvmppc_get_tb_offset() getter reloads KVMPPC_GSID_TB_OFFSET from the L0 for nestedv2 host. This is unnecessary as the value does not change. KVMPPC_GSID_TB_OFFSET also need not be reloaded in kvmppc_{s,g}et_dec_expires(). Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-3-vaibhav@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a37736ed3728..3e1e2a698c9e 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -594,13 +594,17 @@ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ KVMPPC_BOOK3S_VCORE_ACCESSOR(vtb, 64, KVMPPC_GSID_VTB) -KVMPPC_BOOK3S_VCORE_ACCESSOR(tb_offset, 64, KVMPPC_GSID_TB_OFFSET) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(arch_compat, 32, KVMPPC_GSID_LOGICAL_PVR) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(lpcr, 64, KVMPPC_GSID_LPCR) +KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(tb_offset, 64, KVMPPC_GSID_TB_OFFSET) + +static inline u64 kvmppc_get_tb_offset(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vcore->tb_offset; +} static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu) { - WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0); WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB) < 0); return vcpu->arch.dec_expires; } @@ -608,7 +612,6 @@ static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu) static inline void kvmppc_set_dec_expires(struct kvm_vcpu *vcpu, u64 val) { vcpu->arch.dec_expires = val; - WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_TB_OFFSET) < 0); kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB); } -- cgit v1.2.3 From 63ccae78cd88b52fb1d598ae33fa8408ce067b30 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:08 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Do not check msr on hcalls The check for a hcall coming from userspace is done for KVM-PR. This is not supported for nestedv2 and the L0 will directly inject the necessary exception to the L2 if userspace performs a hcall. Avoid checking the MSR and thus avoid a H_GUEST_GET_STATE hcall in the L1. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-4-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 5543e8490cd9..069c336b6f3c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1688,7 +1688,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, { int i; - if (unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { + if (!kvmhv_is_nestedv2() && unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { /* * Guest userspace executed sc 1. This can only be * reached by the P9 path because the old path @@ -4949,7 +4949,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { accumulate_time(vcpu, &vcpu->arch.hcall); - if (WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { + if (!kvmhv_is_nestedv2() && WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) { /* * These should have been caught reflected * into the guest by now. Final sanity check: -- cgit v1.2.3 From e678748a8dca5b57041a84a66577f6168587b3f7 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:09 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Get the PID only if needed to copy tofrom a guest kvmhv_copy_tofrom_guest_radix() gets the PID at the start of the function. If pid is not used, then this is a wasteful H_GUEST_GET_STATE hcall for nestedv2 hosts. Move the assignment to where pid will be used. Suggested-by: Nicholas Piggin Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-5-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 175a8eb2681f..916af6c153a5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -97,7 +97,7 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, void *from, unsigned long n) { int lpid = vcpu->kvm->arch.lpid; - int pid = kvmppc_get_pid(vcpu); + int pid; /* This would cause a data segment intr so don't allow the access */ if (eaddr & (0x3FFUL << 52)) @@ -110,6 +110,8 @@ static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, /* If accessing quadrant 3 then pid is expected to be 0 */ if (((eaddr >> 62) & 0x3) == 0x3) pid = 0; + else + pid = kvmppc_get_pid(vcpu); eaddr &= ~(0xFFFUL << 52); -- cgit v1.2.3 From ec0f6639fa8853cf6bfdfc3588aada7eeb7e5e37 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:10 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Ensure LPCR_MER bit is passed to the L0 LPCR_MER is conditionally set during entry to a guest if there is a pending external interrupt. In the nestedv2 case, this change is not being communicated to the L0, which means it is not being set in the L2. Ensure the updated LPCR value is passed to the L0. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-6-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 069c336b6f3c..6d1f0bca27aa 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4084,6 +4084,8 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit, if (rc < 0) return -EINVAL; + kvmppc_gse_put_u64(io->vcpu_run_input, KVMPPC_GSID_LPCR, lpcr); + accumulate_time(vcpu, &vcpu->arch.in_guest); rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id, &trap, &i); -- cgit v1.2.3 From ecd10702baae5c16a91d139bde7eff84ce55daee Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 1 Dec 2023 18:56:11 +0530 Subject: KVM: PPC: Book3S HV: Handle pending exceptions on guest entry with MSR_EE Commit 026728dc5d41 ("KVM: PPC: Book3S HV P9: Inject pending xive interrupts at guest entry") changed guest entry so that if external interrupts are enabled, BOOK3S_IRQPRIO_EXTERNAL is not tested for. Test for this regardless of MSR_EE. For an L1 host, do not inject an interrupt, but always use LPCR_MER. If the L0 desires it can inject an interrupt. Fixes: 026728dc5d41 ("KVM: PPC: Book3S HV P9: Inject pending xive interrupts at guest entry") Signed-off-by: Nicholas Piggin [jpn: use kvmpcc_get_msr(), write commit message] Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-7-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6d1f0bca27aa..4dc6a928073f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4738,13 +4738,19 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (!nested) { kvmppc_core_prepare_to_enter(vcpu); - if (__kvmppc_get_msr_hv(vcpu) & MSR_EE) { - if (xive_interrupt_pending(vcpu)) + if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, + &vcpu->arch.pending_exceptions) || + xive_interrupt_pending(vcpu)) { + /* + * For nested HV, don't synthesize but always pass MER, + * the L0 will be able to optimise that more + * effectively than manipulating registers directly. + */ + if (!kvmhv_on_pseries() && (__kvmppc_get_msr_hv(vcpu) & MSR_EE)) kvmppc_inject_interrupt_hv(vcpu, - BOOK3S_INTERRUPT_EXTERNAL, 0); - } else if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, - &vcpu->arch.pending_exceptions)) { - lpcr |= LPCR_MER; + BOOK3S_INTERRUPT_EXTERNAL, 0); + else + lpcr |= LPCR_MER; } } else if (vcpu->arch.pending_exceptions || vcpu->arch.doorbell_request || -- cgit v1.2.3 From df938a5576f3f3b08e1f217c660385c0d58a0b91 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:12 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Do not inject certain interrupts There is no need to inject an external interrupt in kvmppc_book3s_irqprio_deliver() as the test for BOOK3S_IRQPRIO_EXTERNAL in kvmhv_run_single_vcpu() before guest entry will raise LPCR_MER if needed. There is also no need to inject the decrementer interrupt as this will be raised within the L2 if needed. Avoiding these injections reduces H_GUEST_GET_STATE hcalls by the L1. Suggested-by: Nicholas Piggin Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-8-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 6cd20ab9e94e..8acec144120e 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -302,11 +302,11 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, switch (priority) { case BOOK3S_IRQPRIO_DECREMENTER: - deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; + deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_DECREMENTER; break; case BOOK3S_IRQPRIO_EXTERNAL: - deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; + deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; vec = BOOK3S_INTERRUPT_EXTERNAL; break; case BOOK3S_IRQPRIO_SYSTEM_RESET: -- cgit v1.2.3 From a9a3de530d7531bf6cd3f6ccda769cd94c1105a0 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:13 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Avoid msr check in kvmppc_handle_exit_hv() The msr check in kvmppc_handle_exit_hv() is not needed for nestedv2 hosts, skip the check to avoid a H_GUEST_GET_STATE hcall. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-9-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4dc6a928073f..47fe470375df 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1597,7 +1597,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * That can happen due to a bug, or due to a machine check * occurring at just the wrong time. */ - if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) { + if (!kvmhv_is_nestedv2() && (__kvmppc_get_msr_hv(vcpu) & MSR_HV)) { printk(KERN_EMERG "KVM trap in HV mode!\n"); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", vcpu->arch.trap, kvmppc_get_pc(vcpu), -- cgit v1.2.3 From 4bc8ff6f170c78f64446c5d5f9ef6771eefd3416 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:14 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Do not call H_COPY_TOFROM_GUEST H_COPY_TOFROM_GUEST is part of the nestedv1 API and so should not be called by a nestedv2 host. Do not attempt to call it. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-10-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 916af6c153a5..4a1abb9f7c05 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -40,6 +40,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, unsigned long quadrant, ret = n; bool is_load = !!to; + if (kvmhv_is_nestedv2()) + return H_UNSUPPORTED; + /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, -- cgit v1.2.3 From db1dcfae1dae3c042f348175ac0394e2fc14b1b3 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:15 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Register the VPA with the L0 In the nestedv2 case, the L1 may register the L2's VPA with the L0. This allows the L0 to manage the L2's dispatch count, as well as enable possible performance optimisations by seeing if certain resources are not being used by the L2 (such as the PMCs). Use the H_GUEST_SET_STATE call to inform the L0 of the L2's VPA address. This can not be done in the H_GUEST_VCPU_RUN input buffer. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-11-vaibhav@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 1 + arch/powerpc/kvm/book3s_hv.c | 38 ++++++++++++++++++++++++-------- arch/powerpc/kvm/book3s_hv_nestedv2.c | 29 ++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2477021bff54..d8729ec81ca0 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -682,6 +682,7 @@ void kvmhv_nestedv2_vcpu_free(struct kvm_vcpu *vcpu, struct kvmhv_nestedv2_io *i int kvmhv_nestedv2_flush_vcpu(struct kvm_vcpu *vcpu, u64 time_limit); int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1); int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu); +int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 47fe470375df..2ee3f2478570 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -650,7 +650,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, return err; } -static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) +static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap, + struct kvmppc_vpa *old_vpap) { struct kvm *kvm = vcpu->kvm; void *va; @@ -690,9 +691,8 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) kvmppc_unpin_guest_page(kvm, va, gpa, false); va = NULL; } - if (vpap->pinned_addr) - kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, - vpap->dirty); + *old_vpap = *vpap; + vpap->gpa = gpa; vpap->pinned_addr = va; vpap->dirty = false; @@ -702,6 +702,9 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; + struct kvmppc_vpa old_vpa = { 0 }; + if (!(vcpu->arch.vpa.update_pending || vcpu->arch.slb_shadow.update_pending || vcpu->arch.dtl.update_pending)) @@ -709,17 +712,34 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); if (vcpu->arch.vpa.update_pending) { - kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); - if (vcpu->arch.vpa.pinned_addr) + kvmppc_update_vpa(vcpu, &vcpu->arch.vpa, &old_vpa); + if (old_vpa.pinned_addr) { + if (kvmhv_is_nestedv2()) + kvmhv_nestedv2_set_vpa(vcpu, ~0ull); + kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa, + old_vpa.dirty); + } + if (vcpu->arch.vpa.pinned_addr) { init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + if (kvmhv_is_nestedv2()) + kvmhv_nestedv2_set_vpa(vcpu, __pa(vcpu->arch.vpa.pinned_addr)); + } } if (vcpu->arch.dtl.update_pending) { - kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); + kvmppc_update_vpa(vcpu, &vcpu->arch.dtl, &old_vpa); + if (old_vpa.pinned_addr) + kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa, + old_vpa.dirty); vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; vcpu->arch.dtl_index = 0; } - if (vcpu->arch.slb_shadow.update_pending) - kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow); + if (vcpu->arch.slb_shadow.update_pending) { + kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow, &old_vpa); + if (old_vpa.pinned_addr) + kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa, + old_vpa.dirty); + } + spin_unlock(&vcpu->arch.vpa_update_lock); } diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index fd3c4f2d9480..5378eb40b162 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -855,6 +855,35 @@ free_gsb: } EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_ptbl_entry); +/** + * kvmhv_nestedv2_set_vpa() - register L2 VPA with L0 + * @vcpu: vcpu + * @vpa: L1 logical real address + */ +int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa) +{ + struct kvmhv_nestedv2_io *io; + struct kvmppc_gs_buff *gsb; + int rc = 0; + + io = &vcpu->arch.nestedv2_io; + gsb = io->vcpu_run_input; + + kvmppc_gsb_reset(gsb); + rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_VPA, vpa); + if (rc < 0) + goto out; + + rc = kvmppc_gsb_send(gsb, 0); + if (rc < 0) + pr_err("KVM-NESTEDv2: couldn't register the L2 VPA (rc=%d)\n", rc); + +out: + kvmppc_gsb_reset(gsb); + return rc; +} +EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_vpa); + /** * kvmhv_nestedv2_parse_output() - receive values from H_GUEST_RUN_VCPU output * @vcpu: vcpu -- cgit v1.2.3 From 797a5af8fc7297b19e5c6b1713956ebf1e6c1cde Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:16 +0530 Subject: KVM: PPC: Reduce reliance on analyse_instr() in mmio emulation Commit 709236039964 ("KVM: PPC: Reimplement non-SIMD LOAD/STORE instruction mmio emulation with analyse_instr() input") and commit 2b33cb585f94 ("KVM: PPC: Reimplement LOAD_FP/STORE_FP instruction mmio emulation with analyse_instr() input") made kvmppc_emulate_loadstore() use the results from analyse_instr() for instruction emulation. In particular the effective address from analyse_instr() is used for UPDATE type instructions and fact that op.val is all ready endian corrected is used in the STORE case. However, these changes now have some negative implications for the nestedv2 case. For analyse_instr() to determine the correct effective address, the GPRs must be loaded from the L0. This is not needed as vcpu->arch.vaddr_accessed is already set. Change back to using vcpu->arch.vaddr_accessed. In the STORE case, use kvmppc_get_gpr() value instead of the op.val. kvmppc_get_gpr() will reload from the L0 if needed in the nestedv2 case. This means if a byte reversal is needed must now be passed to kvmppc_handle_store() like in the kvmppc_handle_load() case. This means the call to kvmhv_nestedv2_reload_ptregs() can be avoided as there is no concern about op.val being stale. Drop the call to kvmhv_nestedv2_mark_dirty_ptregs() as without the call to kvmhv_nestedv2_reload_ptregs(), stale state could be marked as valid. This is fine as the required marking things dirty is already handled for the UPDATE case by the call to kvmppc_set_gpr(). For LOADs, it is handled in kvmppc_complete_mmio_load(). This is called either directly in __kvmppc_handle_load() if the load can be handled in KVM, or on the next kvm_arch_vcpu_ioctl_run() if an exit was required. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-12-vaibhav@linux.ibm.com --- arch/powerpc/kvm/emulate_loadstore.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 077fd88a0b68..ec60c7979718 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -93,7 +93,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = EMULATE_FAIL; vcpu->arch.regs.msr = kvmppc_get_msr(vcpu); - kvmhv_nestedv2_reload_ptregs(vcpu, &vcpu->arch.regs); if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); @@ -112,7 +111,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) op.reg, size, !instr_byte_swap); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; } @@ -132,7 +131,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) KVM_MMIO_REG_FPR|op.reg, size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; #endif @@ -224,16 +223,17 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) break; } #endif - case STORE: - /* if need byte reverse, op.val has been reversed by - * analyse_instr(). - */ - emulated = kvmppc_handle_store(vcpu, op.val, size, 1); + case STORE: { + int instr_byte_swap = op.type & BYTEREV; + + emulated = kvmppc_handle_store(vcpu, kvmppc_get_gpr(vcpu, op.reg), + size, !instr_byte_swap); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; + } #ifdef CONFIG_PPC_FPU case STORE_FP: if (kvmppc_check_fp_disabled(vcpu)) @@ -254,7 +254,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) kvmppc_get_fpr(vcpu, op.reg), size, 1); if ((op.type & UPDATE) && (emulated != EMULATE_FAIL)) - kvmppc_set_gpr(vcpu, op.update_reg, op.ea); + kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed); break; #endif @@ -358,7 +358,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } trace_kvm_ppc_instr(ppc_inst_val(inst), kvmppc_get_pc(vcpu), emulated); - kvmhv_nestedv2_mark_dirty_ptregs(vcpu, &vcpu->arch.regs); /* Advance past emulated instruction. */ if (emulated != EMULATE_FAIL) -- cgit v1.2.3 From 180c6b072bf360b686e53d893d8dcf7dbbaec6bb Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Fri, 1 Dec 2023 18:56:17 +0530 Subject: KVM: PPC: Book3S HV nestedv2: Do not cancel pending decrementer exception In the nestedv2 case, if there is a pending decrementer exception, the L1 must get the L2's timebase from the L0 to see if the exception should be cancelled. This adds the overhead of a H_GUEST_GET_STATE call to the likely case in which the decrementer should not be cancelled. Avoid this logic for the nestedv2 case. Signed-off-by: Jordan Niethe Signed-off-by: Michael Ellerman Link: https://msgid.link/20231201132618.555031-13-vaibhav@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2ee3f2478570..e48126a59ba7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4834,7 +4834,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, * entering a nested guest in which case the decrementer is now owned * by L2 and the L1 decrementer is provided in hdec_expires */ - if (kvmppc_core_pending_dec(vcpu) && + if (!kvmhv_is_nestedv2() && kvmppc_core_pending_dec(vcpu) && ((tb < kvmppc_dec_expires_host_tb(vcpu)) || (trap == BOOK3S_INTERRUPT_SYSCALL && kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) -- cgit v1.2.3 From 773b93f1d1c38c5c0d5308b8c9229c7a6ec5b2a0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (IBM)" Date: Mon, 4 Dec 2023 15:06:37 +0530 Subject: powerpc/book3s/hash: Drop _PAGE_PRIVILEGED from PAGE_NONE There used to be a dependency on _PAGE_PRIVILEGED with pte_savedwrite. But that got dropped by commit 6a56ccbcf6c6 ("mm/autonuma: use can_change_(pte|pmd)_writable() to replace savedwrite") With the change in this patch numa fault pte (pte_protnone()) gets mapped as regular user pte with RWX cleared (no-access) whereas earlier it used to be mapped _PAGE_PRIVILEGED. Hash fault handling code gets some WARN_ON added in this patch because those functions are not expected to get called with _PAGE_READ cleared. commit 18061c17c8ec ("powerpc/mm: Update PROTFAULT handling in the page fault path") explains the details. Signed-off-by: "Aneesh Kumar K.V (IBM)" Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20231204093638.71503-1-aneesh.kumar@kernel.org --- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++-------- arch/powerpc/mm/book3s64/hash_utils.c | 7 +++++++ 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index cb77eddca54b..927d585652bc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -17,12 +17,6 @@ #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ #define _PAGE_READ 0x00004 /* read access allowed */ -#define _PAGE_NA _PAGE_PRIVILEGED -#define _PAGE_NAX _PAGE_EXEC -#define _PAGE_RO _PAGE_READ -#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC) -#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) -#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ #define _PAGE_SAO 0x00010 /* Strong access order */ #define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ @@ -532,8 +526,8 @@ static inline bool pte_user(pte_t pte) static inline bool pte_access_permitted(pte_t pte, bool write) { /* - * _PAGE_READ is needed for any access and will be - * cleared for PROT_NONE + * _PAGE_READ is needed for any access and will be cleared for + * PROT_NONE. Execute-only mapping via PROT_EXEC also returns false. */ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) return false; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index ad2afa08e62e..0626a25b0d72 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -310,9 +310,16 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags else rflags |= 0x3; } + VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); } else { if (pteflags & _PAGE_RWX) rflags |= 0x2; + /* + * We should never hit this in normal fault handling because + * a permission check (check_pte_access()) will bubble this + * to higher level linux handler even for PAGE_NONE. + */ + VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY))) rflags |= 0x1; } -- cgit v1.2.3 From a59c14f6b4caad7671dfb81737beba0b313897e4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (IBM)" Date: Mon, 4 Dec 2023 15:06:38 +0530 Subject: powerpc/book3s64: Avoid __pte_protnone() check in __pte_flags_need_flush() This reverts commit 1abce0580b89 ("powerpc/64s: Fix __pte_needs_flush() false positive warning") The previous patch dropped the usage of _PAGE_PRIVILEGED with PAGE_NONE. Hence this check can be dropped. Signed-off-by: "Aneesh Kumar K.V (IBM)" Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20231204093638.71503-2-aneesh.kumar@kernel.org --- arch/powerpc/include/asm/book3s/64/tlbflush.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 1950c1b825b4..fd642b729775 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -158,11 +158,6 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, */ } -static inline bool __pte_protnone(unsigned long pte) -{ - return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE); -} - static inline bool __pte_flags_need_flush(unsigned long oldval, unsigned long newval) { @@ -179,8 +174,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval, /* * We do not expect kernel mappings or non-PTEs or not-present PTEs. */ - VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED); - VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED); VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(newval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT)); -- cgit v1.2.3 From dc420877b5bd92db5d80df6b117c7a0f987290af Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Dec 2023 22:55:45 +1100 Subject: powerpc/Makefile: Don't use $(ARCH) unnecessarily There's no need to use $(ARCH) for references to the arch directory in the source tree, it is always arch/powerpc. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231206115548.1466874-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f19dbaa1d541..b0bc17c35ed7 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -161,7 +161,7 @@ CFLAGS-y += $(CONFIG_TUNE_CPU) asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) -KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr) +KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr) KBUILD_AFLAGS += $(AFLAGS-y) KBUILD_CFLAGS += $(call cc-option,-msoft-float) KBUILD_CFLAGS += $(CFLAGS-y) @@ -232,7 +232,7 @@ BOOT_TARGETS2 := zImage% dtbImage% treeImage.% cuImage.% simpleImage.% uImage.% PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2) -boot := arch/$(ARCH)/boot +boot := arch/powerpc/boot $(BOOT_TARGETS1): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) @@ -336,7 +336,7 @@ PHONY += $(generated_configs) define archhelp echo '* zImage - Build default images selected by kernel config' - echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' + echo ' zImage.* - Compressed kernel image (arch/powerpc/boot/zImage.*)' echo ' uImage - U-Boot native image format' echo ' cuImage.
- Backwards compatible U-Boot image for older' echo ' versions which do not support device trees' @@ -347,12 +347,12 @@ define archhelp echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' - echo ' *_defconfig - Select default config from arch/$(ARCH)/configs' + echo ' *_defconfig - Select default config from arch/powerpc/configs' echo '' echo ' Targets with
embed a device tree blob inside the image' echo ' These targets support board with firmware that does not' echo ' support passing a device tree directly. Replace
with the' - echo ' name of a dts file from the arch/$(ARCH)/boot/dts/ directory' + echo ' name of a dts file from the arch/powerpc/boot/dts/ directory' echo ' (minus the .dts extension).' echo $(foreach cfg,$(generated_configs), -- cgit v1.2.3 From 42449052c94f22e18e01d71147d8fd75cb58132a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Dec 2023 22:55:46 +1100 Subject: powerpc/vdso: No need to undef powerpc for 64-bit build The vdso Makefile adds -U$(ARCH) to CPPFLAGS for the vdso64.lds linker script. ARCH is always powerpc, so it becomes -Upowerpc, which means undefine the "powerpc" symbol. But the 64-bit compiler doesn't define powerpc in the first place, compare: $ gcc-5.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-gcc -m32 -E -dM - Link: https://msgid.link/20231206115548.1466874-2-mpe@ellerman.id.au --- arch/powerpc/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 0c7d82c270c3..1b93655c2857 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -71,7 +71,7 @@ AS64FLAGS := -D__VDSO64__ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE -- cgit v1.2.3 From 22f17b02f88b48c01d3ac38d40d2b0b695ab2d10 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Dec 2023 22:55:47 +1100 Subject: powerpc/Makefile: Default to ppc64le_defconfig when cross building If the kernel is being cross compiled, there is no information from uname on which defconfig is most appropriate, so the Makefile defaults to ppc64. However these days almost all distros that support powerpc are little endian, so it's more likely that defaulting to ppc64le_defconfig will produce something useful for a user. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231206115548.1466874-3-mpe@ellerman.id.au --- arch/powerpc/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index b0bc17c35ed7..48c06f5a0dc1 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -16,9 +16,9 @@ HAS_BIARCH := $(call cc-option-yn, -m32) CROSS32_COMPILE ?= # If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use -# ppc64_defconfig because we have nothing better to go on. +# ppc64le_defconfig because we have nothing better to go on. uname := $(shell uname -m) -KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig +KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64le)_defconfig new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) -- cgit v1.2.3 From 402928b58ec62b42b11992a7b51ff2f56ed65a18 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Dec 2023 22:55:48 +1100 Subject: powerpc/Makefile: Auto detect cross compiler If no cross compiler is specified, try to auto detect one. Look for various combinations, matching: powerpc(64(le)?)?(-unknown)?-linux(-gnu)?- There are more possibilities, but the above is known to find a compiler on Fedora and Ubuntu (which use linux-gnu-), and also detects the kernel.org cross compilers (which use linux-). This allows cross compiling with simply: # Ubuntu $ sudo apt install gcc-powerpc-linux-gnu # Fedora $ sudo dnf install gcc-powerpc64-linux-gnu $ make ARCH=powerpc defconfig $ make ARCH=powerpc -j 4 Inspired by arch/parisc/Makefile. Acked-by: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://msgid.link/20231206115548.1466874-4-mpe@ellerman.id.au --- arch/powerpc/Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 48c06f5a0dc1..051247027da0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -10,6 +10,17 @@ # Rewritten by Cort Dougan and Paul Mackerras # +ifdef cross_compiling + ifeq ($(CROSS_COMPILE),) + # Auto detect cross compiler prefix. + # Look for: (powerpc(64(le)?)?)(-unknown)?-linux(-gnu)?- + CC_ARCHES := powerpc powerpc64 powerpc64le + CC_SUFFIXES := linux linux-gnu unknown-linux-gnu + CROSS_COMPILE := $(call cc-cross-prefix, $(foreach a,$(CC_ARCHES), \ + $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) + endif +endif + HAS_BIARCH := $(call cc-option-yn, -m32) # Set default 32 bits cross compilers for vdso and boot wrapper -- cgit v1.2.3 From 070b71f428facd9130319707db854ed8bd24637a Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 16 Nov 2023 17:50:32 +0530 Subject: powerpc/hv-gpci: Add return value check in affinity_domain_via_partition_show function To access hv-gpci kernel interface files data, the "Enable Performance Information Collection" option has to be set in hmc. Incase that option is not set and user try to read the interface files, it should give error message as operation not permitted. Result of accessing added interface files with disabled performance collection option: [command]# cat processor_bus_topology cat: processor_bus_topology: Operation not permitted [command]# cat processor_config cat: processor_config: Operation not permitted [command]# cat affinity_domain_via_domain cat: affinity_domain_via_domain: Operation not permitted [command]# cat affinity_domain_via_virtual_processor cat: affinity_domain_via_virtual_processor: Operation not permitted [command]# cat affinity_domain_via_partition Based on above result there is no error message when reading affinity_domain_via_partition file because of missing check for failed hcall. Fix this issue by adding a check in the start of affinity_domain_via_partition_show function, to return error incase hcall fails, with error type other then H_PARAMETER. Fixes: a15e0d6a6929 ("powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via partition information") Reported-by: Disha Goel Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20231116122033.160964-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 39dbe6b348df..27f18119fda1 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -534,6 +534,9 @@ static ssize_t affinity_domain_via_partition_show(struct device *dev, struct dev if (!ret) goto parse_result; + if (ret && (ret != H_PARAMETER)) + goto out; + /* * ret value as 'H_PARAMETER' implies that the current buffer size * can't accommodate all the information, and a partial buffer -- cgit v1.2.3 From 01e346ffefda3a7088afebf02b940614179688e7 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:48 -0600 Subject: powerpc/rtas: Avoid warning on invalid token argument to sys_rtas() rtas_token_to_function() WARNs when passed an invalid token; it's meant to catch bugs in kernel-based users of RTAS functions. However, user space controls the token value passed to rtas_token_to_function() by block_rtas_call(), so user space with sufficient privilege to use sys_rtas() can trigger the warnings at will: unexpected failed lookup for token 2048 WARNING: CPU: 20 PID: 2247 at arch/powerpc/kernel/rtas.c:556 rtas_token_to_function+0xfc/0x110 ... NIP rtas_token_to_function+0xfc/0x110 LR rtas_token_to_function+0xf8/0x110 Call Trace: rtas_token_to_function+0xf8/0x110 (unreliable) sys_rtas+0x188/0x880 system_call_exception+0x268/0x530 system_call_common+0x160/0x2c4 It's desirable to continue warning on bogus tokens in rtas_token_to_function(). Currently it is used to look up RTAS function descriptors when tracing, where we know there has to have been a successful descriptor lookup by different means already, and it would be a serious inconsistency for the reverse lookup to fail. So instead of weakening rtas_token_to_function()'s contract by removing the warnings, introduce rtas_token_to_function_untrusted(), which has no opinion on failed lookups. Convert block_rtas_call() and rtas_token_to_function() to use it. Fixes: 8252b88294d2 ("powerpc/rtas: improve function information lookups") Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-1-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c49f078382a9..ce37dc9860ef 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -544,6 +544,21 @@ static int __init rtas_token_to_function_xarray_init(void) } arch_initcall(rtas_token_to_function_xarray_init); +/* + * For use by sys_rtas(), where the token value is provided by user + * space and we don't want to warn on failed lookups. + */ +static const struct rtas_function *rtas_token_to_function_untrusted(s32 token) +{ + return xa_load(&rtas_token_to_function_xarray, token); +} + +/* + * Reverse lookup for deriving the function descriptor from a + * known-good token value in contexts where the former is not already + * available. @token must be valid, e.g. derived from the result of a + * prior lookup against the function table. + */ static const struct rtas_function *rtas_token_to_function(s32 token) { const struct rtas_function *func; @@ -551,7 +566,7 @@ static const struct rtas_function *rtas_token_to_function(s32 token) if (WARN_ONCE(token < 0, "invalid token %d", token)) return NULL; - func = xa_load(&rtas_token_to_function_xarray, token); + func = rtas_token_to_function_untrusted(token); if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token)) return NULL; @@ -1721,7 +1736,7 @@ static bool block_rtas_call(int token, int nargs, * If this token doesn't correspond to a function the kernel * understands, you're not allowed to call it. */ - func = rtas_token_to_function(token); + func = rtas_token_to_function_untrusted(token); if (!func) goto err; /* -- cgit v1.2.3 From c500c6e736df030f8956080738f59701c0b43dd8 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:49 -0600 Subject: powerpc/rtas: Add for_each_rtas_function() iterator Add a convenience macro for iterating over every element of the internal function table and convert the one site that can use it. An additional user of the macro is anticipated in changes to follow. Reviewed-by: "Aneesh Kumar K.V (IBM)" Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-2-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ce37dc9860ef..ae9b10c954a1 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -454,6 +454,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { }, }; +#define for_each_rtas_function(funcp) \ + for (funcp = &rtas_function_table[0]; \ + funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \ + ++funcp) + /* * Nearly all RTAS calls need to be serialized. All uses of the * default rtas_args block must hold rtas_lock. @@ -525,10 +530,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray); static int __init rtas_token_to_function_xarray_init(void) { + const struct rtas_function *func; int err = 0; - for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { - const struct rtas_function *func = &rtas_function_table[i]; + for_each_rtas_function(func) { const s32 token = func->token; if (token == RTAS_UNKNOWN_SERVICE) -- cgit v1.2.3 From 669acc7eec223a81ea5e2420de85b61979ab7dad Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:50 -0600 Subject: powerpc/rtas: Fall back to linear search on failed token->function lookup Enabling any of the powerpc:rtas_* tracepoints at boot is likely to result in an oops on RTAS platforms. For example, booting a QEMU pseries model with 'trace_event=powerpc:rtas_input' in the command line leads to: BUG: Kernel NULL pointer dereference on read at 0x00000008 Oops: Kernel access of bad area, sig: 7 [#1] NIP [c00000000004231c] do_enter_rtas+0x1bc/0x460 LR [c00000000004231c] do_enter_rtas+0x1bc/0x460 Call Trace: do_enter_rtas+0x1bc/0x460 (unreliable) rtas_call+0x22c/0x4a0 rtas_get_boot_time+0x80/0x14c read_persistent_clock64+0x124/0x150 read_persistent_wall_and_boot_offset+0x28/0x58 timekeeping_init+0x70/0x348 start_kernel+0xa0c/0xc1c start_here_common+0x1c/0x20 (This is preceded by a warning for the failed lookup in rtas_token_to_function().) This happens when __do_enter_rtas_trace() attempts a token to function descriptor lookup before the xarray containing the mappings has been set up. Fall back to linear scan of the table if rtas_token_to_function_xarray is empty. Fixes: 24098f580e2b ("powerpc/rtas: add tracepoints around RTAS entry") Reviewed-by: "Aneesh Kumar K.V (IBM)" Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-3-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ae9b10c954a1..f60a8e7bd5ed 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -572,11 +572,21 @@ static const struct rtas_function *rtas_token_to_function(s32 token) return NULL; func = rtas_token_to_function_untrusted(token); + if (func) + return func; + /* + * Fall back to linear scan in case the reverse mapping hasn't + * been initialized yet. + */ + if (xa_empty(&rtas_token_to_function_xarray)) { + for_each_rtas_function(func) { + if (func->token == token) + return func; + } + } - if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token)) - return NULL; - - return func; + WARN_ONCE(true, "unexpected failed lookup for token %d", token); + return NULL; } /* This is here deliberately so it's only used in this file */ -- cgit v1.2.3 From 9592aa5ad59e736727fe7894e6e820e2d851abcf Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:51 -0600 Subject: powerpc/rtas: Add function return status constants Not all of the generic RTAS function statuses specified in PAPR have symbolic constants and descriptions in rtas.h. Fix this, providing a little more background, slightly updating the existing wording, and improving the formatting. Reviewed-by: "Aneesh Kumar K.V (IBM)" Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-4-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index a7110ed52e25..08d19e6904f7 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -201,12 +201,25 @@ typedef struct { /* Memory set aside for sys_rtas to use with calls that need a work area. */ #define RTAS_USER_REGION_SIZE (64 * 1024) -/* RTAS return status codes */ -#define RTAS_HARDWARE_ERROR -1 /* Hardware Error */ -#define RTAS_BUSY -2 /* RTAS Busy */ -#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */ -#define RTAS_EXTENDED_DELAY_MIN 9900 -#define RTAS_EXTENDED_DELAY_MAX 9905 +/* + * Common RTAS function return values, derived from the table "RTAS + * Status Word Values" in PAPR+ v2.13 7.2.8: "Return Codes". If a + * function can return a value in this table then generally it has the + * meaning listed here. More extended commentary in the documentation + * for rtas_call(). + * + * RTAS functions may use negative and positive numbers not in this + * set for function-specific error and success conditions, + * respectively. + */ +#define RTAS_SUCCESS 0 /* Success. */ +#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified error. */ +#define RTAS_BUSY -2 /* Retry immediately. */ +#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */ +#define RTAS_UNEXPECTED_STATE_CHANGE -7 /* Seems limited to EEH and slot reset. */ +#define RTAS_EXTENDED_DELAY_MIN 9900 /* Retry after delaying for ~1ms. */ +#define RTAS_EXTENDED_DELAY_MAX 9905 /* Retry after delaying for ~100s. */ +#define RTAS_ML_ISOLATION_ERROR -9000 /* Multi-level isolation error. */ /* statuses specific to ibm,suspend-me */ #define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */ -- cgit v1.2.3 From e7582edb78619abb4ebf0a6e1fed125dcd7243b6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:52 -0600 Subject: powerpc/rtas: Move token validation from block_rtas_call() to sys_rtas() The rtas system call handler sys_rtas() delegates certain input validation steps to a helper function: block_rtas_call(). One of these steps ensures that the user-supplied token value maps to a known RTAS function. This is done by performing a "reverse" token-to-function lookup via rtas_token_to_function_untrusted() to obtain an rtas_function object. In changes to come, sys_rtas() itself will need the function descriptor for the token. To prepare: * Move the lookup and validation up into sys_rtas() and pass the resulting rtas_function pointer to block_rtas_call(), which is otherwise unconcerned with the token value. * Change block_rtas_call() to report the RTAS function name instead of the token value on validation failures, since it can now rely on having a valid function descriptor. One behavior change is that sys_rtas() now silently errors out when passed a bad token, before calling block_rtas_call(). So we will no longer log "RTAS call blocked - exploit attempt?" on invalid tokens. This is consistent with how sys_rtas() currently handles other "metadata" (nargs and nret), while block_rtas_call() is primarily concerned with validating the arguments to be passed to specific RTAS functions. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-5-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f60a8e7bd5ed..ca5bb0b994ac 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1738,24 +1738,18 @@ static bool in_rmo_buf(u32 base, u32 end) end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } -static bool block_rtas_call(int token, int nargs, +static bool block_rtas_call(const struct rtas_function *func, int nargs, struct rtas_args *args) { - const struct rtas_function *func; const struct rtas_filter *f; - const bool is_platform_dump = token == rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP); - const bool is_config_conn = token == rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR); + const bool is_platform_dump = + func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP]; + const bool is_config_conn = + func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR]; u32 base, size, end; /* - * If this token doesn't correspond to a function the kernel - * understands, you're not allowed to call it. - */ - func = rtas_token_to_function_untrusted(token); - if (!func) - goto err; - /* - * And only functions with filters attached are allowed. + * Only functions with filters attached are allowed. */ f = func->filter; if (!f) @@ -1812,14 +1806,15 @@ static bool block_rtas_call(int token, int nargs, return false; err: pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); - pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", - token, nargs, current->comm); + pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n", + func->name, nargs, current->comm); return true; } /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { + const struct rtas_function *func; struct pin_cookie cookie; struct rtas_args args; unsigned long flags; @@ -1849,13 +1844,18 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) nargs * sizeof(rtas_arg_t)) != 0) return -EFAULT; - if (token == RTAS_UNKNOWN_SERVICE) + /* + * If this token doesn't correspond to a function the kernel + * understands, you're not allowed to call it. + */ + func = rtas_token_to_function_untrusted(token); + if (!func) return -EINVAL; args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); - if (block_rtas_call(token, nargs, &args)) + if (block_rtas_call(func, nargs, &args)) return -EINVAL; if (token_is_restricted_errinjct(token)) { -- cgit v1.2.3 From adf7a019e5f82607fc0f0079926d0178afe8f4ef Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:53 -0600 Subject: powerpc/rtas: Facilitate high-level call sequences On RTAS platforms there is a general restriction that the OS must not enter RTAS on more than one CPU at a time. This low-level serialization requirement is satisfied by holding a spin lock (rtas_lock) across most RTAS function invocations. However, some pseries RTAS functions require multiple successive calls to complete a logical operation. Beginning a new call sequence for such a function may disrupt any other sequences of that function already in progress. Safe and reliable use of these functions effectively requires higher-level serialization beyond what is already done at the level of RTAS entry and exit. Where a sequence-based RTAS function is invoked only through sys_rtas(), with no in-kernel users, there is no issue as far as the kernel is concerned. User space is responsible for appropriately serializing its call sequences. (Whether user space code actually takes measures to prevent sequence interleaving is another matter.) Examples of such functions currently include ibm,platform-dump and ibm,get-vpd. But where a sequence-based RTAS function has both user space and in-kernel uesrs, there is a hazard. Even if the in-kernel call sites of such a function serialize their sequences correctly, a user of sys_rtas() can invoke the same function at any time, potentially disrupting a sequence in progress. So in order to prevent disruption of kernel-based RTAS call sequences, they must serialize not only with themselves but also with sys_rtas() users, somehow. Preferably without adding more function-specific hacks to sys_rtas(). This is a prerequisite for adding an in-kernel call sequence of ibm,get-vpd, which is in a change to follow. Note that it has never been feasible for the kernel to prevent sys_rtas()-based sequences from being disrupted because control returns to user space on every call. sys_rtas()-based users of these functions have always been, and continue to be, responsible for coordinating their call sequences with other users, even those which may invoke the RTAS functions through less direct means than sys_rtas(). This is an unavoidable consequence of exposing sequence-based RTAS functions through sys_rtas(). * Add an optional mutex member to struct rtas_function. * Statically define a mutex for each RTAS function with known call sequence serialization requirements, and assign its address to the .lock member of the corresponding function table entry, along with justifying commentary. * In sys_rtas(), if the table entry for the RTAS function being called has a populated lock member, acquire it before taking rtas_lock and entering RTAS. * Kernel-based RTAS call sequences are expected to access the appropriate mutex explicitly by name. For example, a user of the ibm,activate-firmware RTAS function would do: int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE); int fwrc; mutex_lock(&rtas_ibm_activate_firmware_lock); do { fwrc = rtas_call(token, 0, 1, NULL); } while (rtas_busy_delay(fwrc)); mutex_unlock(&rtas_ibm_activate_firmware_lock); There should be no perceivable change introduced here except that concurrent callers of the same RTAS function via sys_rtas() may block on a mutex instead of spinning on rtas_lock. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-6-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 3 ++ arch/powerpc/kernel/rtas.c | 83 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 08d19e6904f7..9bb2210c8d44 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -3,6 +3,7 @@ #define _POWERPC_RTAS_H #ifdef __KERNEL__ +#include #include #include #include @@ -512,6 +513,8 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE]; /* RMO buffer reserved for user-space RTAS use */ extern unsigned long rtas_rmo_buf; +extern struct mutex rtas_ibm_get_vpd_lock; + #define GLOBAL_INTERRUPT_QUEUE 9005 /** diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ca5bb0b994ac..4d28983e8b1d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -70,14 +71,33 @@ struct rtas_filter { * ppc64le, and we want to keep it that way. It does * not make sense for this to be set when @filter * is NULL. + * @lock: Pointer to an optional dedicated per-function mutex. This + * should be set for functions that require multiple calls in + * sequence to complete a single operation, and such sequences + * will disrupt each other if allowed to interleave. Users of + * this function are required to hold the associated lock for + * the duration of the call sequence. Add an explanatory + * comment to the function table entry if setting this member. */ struct rtas_function { s32 token; const bool banned_for_syscall_on_le:1; const char * const name; const struct rtas_filter *filter; + struct mutex *lock; }; +/* + * Per-function locks for sequence-based RTAS functions. + */ +static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock); +static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); +static DEFINE_MUTEX(rtas_ibm_get_indices_lock); +static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock); +static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); +static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); +DEFINE_MUTEX(rtas_ibm_get_vpd_lock); + static struct rtas_function rtas_function_table[] __ro_after_init = { [RTAS_FNIDX__CHECK_EXCEPTION] = { .name = "check-exception", @@ -125,6 +145,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = -1, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ as of v2.13 doesn't explicitly impose any + * restriction, but this typically requires multiple + * calls before success, and there's no reason to + * allow sequences to interleave. + */ + .lock = &rtas_ibm_activate_firmware_lock, }, [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { .name = "ibm,cbe-start-ptcal", @@ -196,6 +223,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 1, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS + * must not call ibm,get-dynamic-sensor-state with + * different inputs until a non-retry status has been + * returned. + */ + .lock = &rtas_ibm_get_dynamic_sensor_state_lock, }, [RTAS_FNIDX__IBM_GET_INDICES] = { .name = "ibm,get-indices", @@ -203,6 +237,12 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = 3, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not + * interleave ibm,get-indices call sequences with + * different inputs. + */ + .lock = &rtas_ibm_get_indices_lock, }, [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { .name = "ibm,get-rio-topology", @@ -220,6 +260,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 0, .size_idx1 = -1, .buf_idx2 = 1, .size_idx2 = 2, }, + /* + * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences + * should not be allowed to interleave. + */ + .lock = &rtas_ibm_get_vpd_lock, }, [RTAS_FNIDX__IBM_GET_XIVE] = { .name = "ibm,get-xive", @@ -239,6 +284,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = 3, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow + * only one call sequence in progress at a time. + */ + .lock = &rtas_ibm_lpar_perftools_lock, }, [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { .name = "ibm,manage-flash-image", @@ -277,6 +327,14 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 0, .size_idx1 = 1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * This follows a sequence-based pattern similar to + * ibm,get-vpd et al. Since PAPR+ restricts + * interleaving call sequences for other functions of + * this style, assume the restriction applies here, + * even though it's not explicit in the spec. + */ + .lock = &rtas_ibm_physical_attestation_lock, }, [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { .name = "ibm,platform-dump", @@ -284,6 +342,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 4, .size_idx1 = 5, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent + * sequences of ibm,platform-dump are allowed if they + * are operating on different dump tags. So leave the + * lock pointer unset for now. This may need + * reconsideration if kernel-internal users appear. + */ }, [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { .name = "ibm,power-off-ups", @@ -326,6 +391,12 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call + * this function with different inputs until a + * non-retry status has been returned. + */ + .lock = &rtas_ibm_set_dynamic_indicator_lock, }, [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { .name = "ibm,set-eeh-option", @@ -1888,6 +1959,15 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); + /* + * If this function has a mutex assigned to it, we must + * acquire it to avoid interleaving with any kernel-based uses + * of the same function. Kernel-based sequences acquire the + * appropriate mutex explicitly. + */ + if (func->lock) + mutex_lock(func->lock); + raw_spin_lock_irqsave(&rtas_lock, flags); cookie = lockdep_pin_lock(&rtas_lock); @@ -1903,6 +1983,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) lockdep_unpin_lock(&rtas_lock, cookie); raw_spin_unlock_irqrestore(&rtas_lock, flags); + if (func->lock) + mutex_unlock(func->lock); + if (buff_copy) { if (errbuf) log_error(errbuf, ERR_TYPE_RTAS_LOG, 0); -- cgit v1.2.3 From dc7637c402b90a197d3f21a3d78f2b00b67ea22a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:54 -0600 Subject: powerpc/rtas: Serialize firmware activation sequences Use rtas_ibm_activate_firmware_lock to prevent interleaving call sequences of the ibm,activate-firmware RTAS function, which typically requires multiple calls to complete the update. While the spec does not specifically prohibit interleaved sequences, there's almost certainly no advantage to allowing them. Reviewed-by: "Aneesh Kumar K.V (IBM)" Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-7-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4d28983e8b1d..72f6b5a402dd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1734,10 +1734,14 @@ void rtas_activate_firmware(void) return; } + mutex_lock(&rtas_ibm_activate_firmware_lock); + do { fwrc = rtas_call(token, 0, 1, NULL); } while (rtas_busy_delay(fwrc)); + mutex_unlock(&rtas_ibm_activate_firmware_lock); + if (fwrc) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } -- cgit v1.2.3 From e3681107bc9f97c5948a1c8a3a97ac64907210ce Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:55 -0600 Subject: powerpc/rtas: Warn if per-function lock isn't held If the function descriptor has a populated lock member, then callers are required to hold it across calls. Now that the firmware activation sequence is appropriately guarded, we can warn when the requirement isn't satisfied. __do_enter_rtas_trace() gets reorganized a bit as a result of performing the function descriptor lookup unconditionally now. Reviewed-by: "Aneesh Kumar K.V (IBM)" Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-8-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 72f6b5a402dd..7e793b503e29 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -671,28 +671,25 @@ static void __do_enter_rtas(struct rtas_args *args) static void __do_enter_rtas_trace(struct rtas_args *args) { - const char *name = NULL; + const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token)); - if (args == &rtas_args) - lockdep_assert_held(&rtas_lock); /* - * If the tracepoints that consume the function name aren't - * active, avoid the lookup. + * If there is a per-function lock, it must be held by the + * caller. */ - if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) { - const s32 token = be32_to_cpu(args->token); - const struct rtas_function *func = rtas_token_to_function(token); + if (func->lock) + lockdep_assert_held(func->lock); - name = func->name; - } + if (args == &rtas_args) + lockdep_assert_held(&rtas_lock); - trace_rtas_input(args, name); + trace_rtas_input(args, func->name); trace_rtas_ll_entry(args); __do_enter_rtas(args); trace_rtas_ll_exit(args); - trace_rtas_output(args, name); + trace_rtas_output(args, func->name); } static void do_enter_rtas(struct rtas_args *args) -- cgit v1.2.3 From 514f6ff4369a30bf0da71a1a09fd47b2fca5d76f Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:56 -0600 Subject: powerpc/pseries: Add papr-vpd character driver for VPD retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PowerVM LPARs may retrieve Vital Product Data (VPD) for system components using the ibm,get-vpd RTAS function. We can expose this to user space with a /dev/papr-vpd character device, where the programming model is: struct papr_location_code plc = { .str = "", }; /* obtain all VPD */ int devfd = open("/dev/papr-vpd", O_RDONLY); int vpdfd = ioctl(devfd, PAPR_VPD_CREATE_HANDLE, &plc); size_t size = lseek(vpdfd, 0, SEEK_END); char *buf = malloc(size); pread(devfd, buf, size, 0); When a file descriptor is obtained from ioctl(PAPR_VPD_CREATE_HANDLE), the file contains the result of a complete ibm,get-vpd sequence. The file contents are immutable from the POV of user space. To get a new view of the VPD, the client must create a new handle. This design choice insulates user space from most of the complexities that ibm,get-vpd brings: * ibm,get-vpd must be called more than once to obtain complete results. * Only one ibm,get-vpd call sequence should be in progress at a time; interleaved sequences will disrupt each other. Callers must have a protocol for serializing their use of the function. * A call sequence in progress may receive a "VPD changed, try again" status, requiring the client to abandon the sequence and start over. The memory required for the VPD buffers seems acceptable, around 20KB for all VPD on one of my systems. And the value of the /rtas/ibm,vpd-size DT property (the estimated maximum size of VPD) is consistently 300KB across various systems I've checked. I've implemented support for this new ABI in the rtas_get_vpd() function in librtas, which the vpdupdate command currently uses to populate its VPD database. I've verified that an unmodified vpdupdate binary generates an identical database when using a librtas.so that prefers the new ABI. Along with the papr-vpd.h header exposed to user space, this introduces a common papr-miscdev.h uapi header to share a base ioctl ID with similar drivers to come. Tested-by: Michal Suchánek Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-9-e9eafd0c8c6c@linux.ibm.com --- Documentation/userspace-api/ioctl/ioctl-number.rst | 2 + arch/powerpc/include/uapi/asm/papr-miscdev.h | 9 + arch/powerpc/include/uapi/asm/papr-vpd.h | 22 + arch/powerpc/platforms/pseries/Makefile | 1 + arch/powerpc/platforms/pseries/papr-vpd.c | 541 +++++++++++++++++++++ 5 files changed, 575 insertions(+) create mode 100644 arch/powerpc/include/uapi/asm/papr-miscdev.h create mode 100644 arch/powerpc/include/uapi/asm/papr-vpd.h create mode 100644 arch/powerpc/platforms/pseries/papr-vpd.c (limited to 'arch') diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 4ea5b837399a..a950545bf7cd 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -349,6 +349,8 @@ Code Seq# Include File Comments 0xB1 00-1F PPPoX +0xB2 00 arch/powerpc/include/uapi/asm/papr-vpd.h powerpc/pseries VPD API + 0xB3 00 linux/mmc/ioctl.h 0xB4 00-0F linux/gpio.h 0xB5 00-0F uapi/linux/rpmsg.h diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h b/arch/powerpc/include/uapi/asm/papr-miscdev.h new file mode 100644 index 000000000000..49a2a270b7f3 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_MISCDEV_H_ +#define _UAPI_PAPR_MISCDEV_H_ + +enum { + PAPR_MISCDEV_IOC_ID = 0xb2, +}; + +#endif /* _UAPI_PAPR_MISCDEV_H_ */ diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h b/arch/powerpc/include/uapi/asm/papr-vpd.h new file mode 100644 index 000000000000..1c88e87cb420 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-vpd.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_VPD_H_ +#define _UAPI_PAPR_VPD_H_ + +#include +#include + +struct papr_location_code { + /* + * PAPR+ v2.13 12.3.2.4 Converged Location Code Rules - Length + * Restrictions. 79 characters plus nul. + */ + char str[80]; +}; + +/* + * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to + * the location code. + */ +#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct papr_location_code) + +#endif /* _UAPI_PAPR_VPD_H_ */ diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 1476c5e4433c..f936962a2946 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o rtas-work-area.o papr-sysparm.o \ + papr-vpd.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o \ diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c new file mode 100644 index 000000000000..c29e85db5f35 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-vpd.c @@ -0,0 +1,541 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-vpd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Function-specific return values for ibm,get-vpd, derived from PAPR+ + * v2.13 7.3.20 "ibm,get-vpd RTAS Call". + */ +#define RTAS_IBM_GET_VPD_COMPLETE 0 /* All VPD has been retrieved. */ +#define RTAS_IBM_GET_VPD_MORE_DATA 1 /* More VPD is available. */ +#define RTAS_IBM_GET_VPD_START_OVER -4 /* VPD changed, restart call sequence. */ + +/** + * struct rtas_ibm_get_vpd_params - Parameters (in and out) for ibm,get-vpd. + * @loc_code: In: Caller-provided location code buffer. Must be RTAS-addressable. + * @work_area: In: Caller-provided work area buffer for results. + * @sequence: In: Sequence number. Out: Next sequence number. + * @written: Out: Bytes written by ibm,get-vpd to @work_area. + * @status: Out: RTAS call status. + */ +struct rtas_ibm_get_vpd_params { + const struct papr_location_code *loc_code; + struct rtas_work_area *work_area; + u32 sequence; + u32 written; + s32 status; +}; + +/** + * rtas_ibm_get_vpd() - Call ibm,get-vpd to fill a work area buffer. + * @params: See &struct rtas_ibm_get_vpd_params. + * + * Calls ibm,get-vpd until it errors or successfully deposits data + * into the supplied work area. Handles RTAS retry statuses. Maps RTAS + * error statuses to reasonable errno values. + * + * The caller is expected to invoke rtas_ibm_get_vpd() multiple times + * to retrieve all the VPD for the provided location code. Only one + * sequence should be in progress at any time; starting a new sequence + * will disrupt any sequence already in progress. Serialization of VPD + * retrieval sequences is the responsibility of the caller. + * + * The caller should inspect @params.status to determine whether more + * calls are needed to complete the sequence. + * + * Context: May sleep. + * Return: -ve on error, 0 otherwise. + */ +static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params) +{ + const struct papr_location_code *loc_code = params->loc_code; + struct rtas_work_area *work_area = params->work_area; + u32 rets[2]; + s32 fwrc; + int ret; + + lockdep_assert_held(&rtas_ibm_get_vpd_lock); + + do { + fwrc = rtas_call(rtas_function_token(RTAS_FN_IBM_GET_VPD), 4, 3, + rets, + __pa(loc_code), + rtas_work_area_phys(work_area), + rtas_work_area_size(work_area), + params->sequence); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case RTAS_HARDWARE_ERROR: + ret = -EIO; + break; + case RTAS_INVALID_PARAMETER: + ret = -EINVAL; + break; + case RTAS_IBM_GET_VPD_START_OVER: + ret = -EAGAIN; + break; + case RTAS_IBM_GET_VPD_MORE_DATA: + params->sequence = rets[0]; + fallthrough; + case RTAS_IBM_GET_VPD_COMPLETE: + params->written = rets[1]; + /* + * Kernel or firmware bug, do not continue. + */ + if (WARN(params->written > rtas_work_area_size(work_area), + "possible write beyond end of work area")) + ret = -EFAULT; + else + ret = 0; + break; + default: + ret = -EIO; + pr_err_ratelimited("unexpected ibm,get-vpd status %d\n", fwrc); + break; + } + + params->status = fwrc; + return ret; +} + +/* + * Internal VPD "blob" APIs for accumulating ibm,get-vpd results into + * an immutable buffer to be attached to a file descriptor. + */ +struct vpd_blob { + const char *data; + size_t len; +}; + +static bool vpd_blob_has_data(const struct vpd_blob *blob) +{ + return blob->data && blob->len; +} + +static void vpd_blob_free(const struct vpd_blob *blob) +{ + if (blob) { + kvfree(blob->data); + kfree(blob); + } +} + +/** + * vpd_blob_extend() - Append data to a &struct vpd_blob. + * @blob: The blob to extend. + * @data: The new data to append to @blob. + * @len: The length of @data. + * + * Context: May sleep. + * Return: -ENOMEM on allocation failure, 0 otherwise. + */ +static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len) +{ + const size_t new_len = blob->len + len; + const size_t old_len = blob->len; + const char *old_ptr = blob->data; + char *new_ptr; + + new_ptr = old_ptr ? + kvrealloc(old_ptr, old_len, new_len, GFP_KERNEL_ACCOUNT) : + kvmalloc(len, GFP_KERNEL_ACCOUNT); + + if (!new_ptr) + return -ENOMEM; + + memcpy(&new_ptr[old_len], data, len); + blob->data = new_ptr; + blob->len = new_len; + return 0; +} + +/** + * vpd_blob_generate() - Construct a new &struct vpd_blob. + * @generator: Function that supplies the blob data. + * @arg: Context pointer supplied by caller, passed to @generator. + * + * The @generator callback is invoked until it returns NULL. @arg is + * passed to @generator in its first argument on each call. When + * @generator returns data, it should store the data length in its + * second argument. + * + * Context: May sleep. + * Return: A completely populated &struct vpd_blob, or NULL on error. + */ +static const struct vpd_blob * +vpd_blob_generate(const char * (*generator)(void *, size_t *), void *arg) +{ + struct vpd_blob *blob; + const char *buf; + size_t len; + int err = 0; + + blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT); + if (!blob) + return NULL; + + while (err == 0 && (buf = generator(arg, &len))) + err = vpd_blob_extend(blob, buf, len); + + if (err != 0 || !vpd_blob_has_data(blob)) + goto free_blob; + + return blob; +free_blob: + vpd_blob_free(blob); + return NULL; +} + +/* + * Internal VPD sequence APIs. A VPD sequence is a series of calls to + * ibm,get-vpd for a given location code. The sequence ends when an + * error is encountered or all VPD for the location code has been + * returned. + */ + +/** + * struct vpd_sequence - State for managing a VPD sequence. + * @error: Shall be zero as long as the sequence has not encountered an error, + * -ve errno otherwise. Use vpd_sequence_set_err() to update this. + * @params: Parameter block to pass to rtas_ibm_get_vpd(). + */ +struct vpd_sequence { + int error; + struct rtas_ibm_get_vpd_params params; +}; + +/** + * vpd_sequence_begin() - Begin a VPD retrieval sequence. + * @seq: Uninitialized sequence state. + * @loc_code: Location code that defines the scope of the VPD to return. + * + * Initializes @seq with the resources necessary to carry out a VPD + * sequence. Callers must pass @seq to vpd_sequence_end() regardless + * of whether the sequence succeeds. + * + * Context: May sleep. + */ +static void vpd_sequence_begin(struct vpd_sequence *seq, + const struct papr_location_code *loc_code) +{ + /* + * Use a static data structure for the location code passed to + * RTAS to ensure it's in the RMA and avoid a separate work + * area allocation. Guarded by the function lock. + */ + static struct papr_location_code static_loc_code; + + /* + * We could allocate the work area before acquiring the + * function lock, but that would allow concurrent requests to + * exhaust the limited work area pool for no benefit. So + * allocate the work area under the lock. + */ + mutex_lock(&rtas_ibm_get_vpd_lock); + static_loc_code = *loc_code; + *seq = (struct vpd_sequence) { + .params = { + .work_area = rtas_work_area_alloc(SZ_4K), + .loc_code = &static_loc_code, + .sequence = 1, + }, + }; +} + +/** + * vpd_sequence_end() - Finalize a VPD retrieval sequence. + * @seq: Sequence state. + * + * Releases resources obtained by vpd_sequence_begin(). + */ +static void vpd_sequence_end(struct vpd_sequence *seq) +{ + rtas_work_area_free(seq->params.work_area); + mutex_unlock(&rtas_ibm_get_vpd_lock); +} + +/** + * vpd_sequence_should_stop() - Determine whether a VPD retrieval sequence + * should continue. + * @seq: VPD sequence state. + * + * Examines the sequence error state and outputs of the last call to + * ibm,get-vpd to determine whether the sequence in progress should + * continue or stop. + * + * Return: True if the sequence has encountered an error or if all VPD for + * this sequence has been retrieved. False otherwise. + */ +static bool vpd_sequence_should_stop(const struct vpd_sequence *seq) +{ + bool done; + + if (seq->error) + return true; + + switch (seq->params.status) { + case 0: + if (seq->params.written == 0) + done = false; /* Initial state. */ + else + done = true; /* All data consumed. */ + break; + case 1: + done = false; /* More data available. */ + break; + default: + done = true; /* Error encountered. */ + break; + } + + return done; +} + +static int vpd_sequence_set_err(struct vpd_sequence *seq, int err) +{ + /* Preserve the first error recorded. */ + if (seq->error == 0) + seq->error = err; + + return seq->error; +} + +/* + * Generator function to be passed to vpd_blob_generate(). + */ +static const char *vpd_sequence_fill_work_area(void *arg, size_t *len) +{ + struct vpd_sequence *seq = arg; + struct rtas_ibm_get_vpd_params *p = &seq->params; + + if (vpd_sequence_should_stop(seq)) + return NULL; + if (vpd_sequence_set_err(seq, rtas_ibm_get_vpd(p))) + return NULL; + *len = p->written; + return rtas_work_area_raw_buf(p->work_area); +} + +/* + * Higher-level VPD retrieval code below. These functions use the + * vpd_blob_* and vpd_sequence_* APIs defined above to create fd-based + * VPD handles for consumption by user space. + */ + +/** + * papr_vpd_run_sequence() - Run a single VPD retrieval sequence. + * @loc_code: Location code that defines the scope of VPD to return. + * + * Context: May sleep. Holds a mutex and an RTAS work area for its + * duration. Typically performs multiple sleepable slab + * allocations. + * + * Return: A populated &struct vpd_blob on success. Encoded error + * pointer otherwise. + */ +static const struct vpd_blob *papr_vpd_run_sequence(const struct papr_location_code *loc_code) +{ + const struct vpd_blob *blob; + struct vpd_sequence seq; + + vpd_sequence_begin(&seq, loc_code); + blob = vpd_blob_generate(vpd_sequence_fill_work_area, &seq); + if (!blob) + vpd_sequence_set_err(&seq, -ENOMEM); + vpd_sequence_end(&seq); + + if (seq.error) { + vpd_blob_free(blob); + return ERR_PTR(seq.error); + } + + return blob; +} + +/** + * papr_vpd_retrieve() - Return the VPD for a location code. + * @loc_code: Location code that defines the scope of VPD to return. + * + * Run VPD sequences against @loc_code until a blob is successfully + * instantiated, or a hard error is encountered, or a fatal signal is + * pending. + * + * Context: May sleep. + * Return: A fully populated VPD blob when successful. Encoded error + * pointer otherwise. + */ +static const struct vpd_blob *papr_vpd_retrieve(const struct papr_location_code *loc_code) +{ + const struct vpd_blob *blob; + + /* + * EAGAIN means the sequence errored with a -4 (VPD changed) + * status from ibm,get-vpd, and we should attempt a new + * sequence. PAPR+ v2.13 R1–7.3.20–5 indicates that this + * should be a transient condition, not something that happens + * continuously. But we'll stop trying on a fatal signal. + */ + do { + blob = papr_vpd_run_sequence(loc_code); + if (!IS_ERR(blob)) /* Success. */ + break; + if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */ + break; + pr_info_ratelimited("VPD changed during retrieval, retrying\n"); + cond_resched(); + } while (!fatal_signal_pending(current)); + + return blob; +} + +static ssize_t papr_vpd_handle_read(struct file *file, char __user *buf, size_t size, loff_t *off) +{ + const struct vpd_blob *blob = file->private_data; + + /* bug: we should not instantiate a handle without any data attached. */ + if (!vpd_blob_has_data(blob)) { + pr_err_once("handle without data\n"); + return -EIO; + } + + return simple_read_from_buffer(buf, size, off, blob->data, blob->len); +} + +static int papr_vpd_handle_release(struct inode *inode, struct file *file) +{ + const struct vpd_blob *blob = file->private_data; + + vpd_blob_free(blob); + + return 0; +} + +static loff_t papr_vpd_handle_seek(struct file *file, loff_t off, int whence) +{ + const struct vpd_blob *blob = file->private_data; + + return fixed_size_llseek(file, off, whence, blob->len); +} + + +static const struct file_operations papr_vpd_handle_ops = { + .read = papr_vpd_handle_read, + .llseek = papr_vpd_handle_seek, + .release = papr_vpd_handle_release, +}; + +/** + * papr_vpd_create_handle() - Create a fd-based handle for reading VPD. + * @ulc: Location code in user memory; defines the scope of the VPD to + * retrieve. + * + * Handler for PAPR_VPD_IOC_CREATE_HANDLE ioctl command. Validates + * @ulc and instantiates an immutable VPD "blob" for it. The blob is + * attached to a file descriptor for reading by user space. The memory + * backing the blob is freed when the file is released. + * + * The entire requested VPD is retrieved by this call and all + * necessary RTAS interactions are performed before returning the fd + * to user space. This keeps the read handler simple and ensures that + * the kernel can prevent interleaving of ibm,get-vpd call sequences. + * + * Return: The installed fd number if successful, -ve errno otherwise. + */ +static long papr_vpd_create_handle(struct papr_location_code __user *ulc) +{ + struct papr_location_code klc; + const struct vpd_blob *blob; + struct file *file; + long err; + int fd; + + if (copy_from_user(&klc, ulc, sizeof(klc))) + return -EFAULT; + + if (!string_is_terminated(klc.str, ARRAY_SIZE(klc.str))) + return -EINVAL; + + blob = papr_vpd_retrieve(&klc); + if (IS_ERR(blob)) + return PTR_ERR(blob); + + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = fd; + goto free_blob; + } + + file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops, + (void *)blob, O_RDONLY); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_fd; + } + + file->f_mode |= FMODE_LSEEK | FMODE_PREAD; + fd_install(fd, file); + return fd; +put_fd: + put_unused_fd(fd); +free_blob: + vpd_blob_free(blob); + return err; +} + +/* + * Top-level ioctl handler for /dev/papr-vpd. + */ +static long papr_vpd_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_VPD_IOC_CREATE_HANDLE: + ret = papr_vpd_create_handle(argp); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_vpd_ops = { + .unlocked_ioctl = papr_vpd_dev_ioctl, +}; + +static struct miscdevice papr_vpd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-vpd", + .fops = &papr_vpd_ops, +}; + +static __init int papr_vpd_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_GET_VPD)) + return -ENODEV; + + return misc_register(&papr_vpd_dev); +} +machine_device_initcall(pseries, papr_vpd_init); -- cgit v1.2.3 From 35aae182bd7b422be3cefc08c12207bf2b973364 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:57 -0600 Subject: powerpc/pseries/papr-sysparm: Validate buffer object lengths The ability to get and set system parameters will be exposed to user space, so let's get a little more strict about malformed papr_sysparm_buf objects. * Create accessors for the length field of struct papr_sysparm_buf. The length is always stored in MSB order and this is better than spreading the necessary conversions all over. * Reject attempts to submit invalid buffers to RTAS. * Warn if RTAS returns a buffer with an invalid length, clamping the returned length to a safe value that won't overrun the buffer. These are meant as precautionary measures to mitigate both firmware and kernel bugs in this area, should they arise, but I am not aware of any. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-10-e9eafd0c8c6c@linux.ibm.com --- arch/powerpc/platforms/pseries/papr-sysparm.c | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c index fedc61599e6c..a1e7aeac7416 100644 --- a/arch/powerpc/platforms/pseries/papr-sysparm.c +++ b/arch/powerpc/platforms/pseries/papr-sysparm.c @@ -23,6 +23,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) kfree(buf); } +static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf) +{ + return be16_to_cpu(buf->len); +} + +static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t length) +{ + WARN_ONCE(length > sizeof(buf->val), + "bogus length %zu, clamping to safe value", length); + length = min(sizeof(buf->val), length); + buf->len = cpu_to_be16(length); +} + +/* + * For use on buffers returned from ibm,get-system-parameter before + * returning them to callers. Ensures the encoded length of valid data + * cannot overrun buf->val[]. + */ +static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf) +{ + papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf)); +} + +/* + * Perform some basic diligence on the system parameter buffer before + * submitting it to RTAS. + */ +static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf) +{ + /* + * Firmware ought to reject buffer lengths that exceed the + * maximum specified in PAPR, but there's no reason for the + * kernel to allow them either. + */ + if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val)) + return false; + + return true; +} + /** * papr_sysparm_get() - Retrieve the value of a PAPR system parameter. * @param: PAPR system parameter token as described in @@ -63,6 +103,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; + if (!papr_sysparm_buf_can_submit(buf)) + return -EINVAL; + work_area = rtas_work_area_alloc(sizeof(*buf)); memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); @@ -77,6 +120,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) case 0: ret = 0; memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf)); + papr_sysparm_buf_clamp_length(buf); break; case -3: /* parameter not implemented */ ret = -EOPNOTSUPP; @@ -115,6 +159,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; + if (!papr_sysparm_buf_can_submit(buf)) + return -EINVAL; + work_area = rtas_work_area_alloc(sizeof(*buf)); memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); -- cgit v1.2.3 From 905b9e48786ec55b2c469db77fb46e20bf3e4901 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:58 -0600 Subject: powerpc/pseries/papr-sysparm: Expose character device to user space Until now the papr_sysparm APIs have been kernel-internal. But user space needs access to PAPR system parameters too. The only method available to user space today to get or set system parameters is using sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user space and firmware. This is incompatible with lockdown and should be deprecated. So provide an alternative ABI to user space in the form of a /dev/papr-sysparm character device with just two ioctl commands (get and set). The data payloads involved are small enough to fit in the ioctl argument buffer, making the code relatively simple. Exposing the system parameters through sysfs has been considered but it would be too awkward: * The kernel currently does not have to contain an exhaustive list of defined system parameters. This is a convenient property to maintain because we don't have to update the kernel whenever a new parameter is added to PAPR. Exporting a named attribute in sysfs for each parameter would negate this. * Some system parameters are text-based and some are not. * Retrieval of at least one system parameter requires input data, which a simple read-oriented interface can't support. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-11-e9eafd0c8c6c@linux.ibm.com --- Documentation/userspace-api/ioctl/ioctl-number.rst | 2 + arch/powerpc/include/asm/papr-sysparm.h | 17 ++- arch/powerpc/include/uapi/asm/papr-sysparm.h | 58 ++++++++ arch/powerpc/platforms/pseries/papr-sysparm.c | 158 ++++++++++++++++++++- 4 files changed, 227 insertions(+), 8 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/papr-sysparm.h (limited to 'arch') diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index a950545bf7cd..d8b6cb1a3636 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -351,6 +351,8 @@ Code Seq# Include File Comments 0xB2 00 arch/powerpc/include/uapi/asm/papr-vpd.h powerpc/pseries VPD API +0xB2 01-02 arch/powerpc/include/uapi/asm/papr-sysparm.h powerpc/pseries system parameter API + 0xB3 00 linux/mmc/ioctl.h 0xB4 00-0F linux/gpio.h 0xB5 00-0F uapi/linux/rpmsg.h diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h index f5fdbd8ae9db..0dbbff59101d 100644 --- a/arch/powerpc/include/asm/papr-sysparm.h +++ b/arch/powerpc/include/asm/papr-sysparm.h @@ -2,8 +2,10 @@ #ifndef _ASM_POWERPC_PAPR_SYSPARM_H #define _ASM_POWERPC_PAPR_SYSPARM_H +#include + typedef struct { - const u32 token; + u32 token; } papr_sysparm_t; #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, }) @@ -20,11 +22,14 @@ typedef struct { #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS mk_papr_sysparm(50) #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55) -enum { - PAPR_SYSPARM_MAX_INPUT = 1024, - PAPR_SYSPARM_MAX_OUTPUT = 4000, -}; - +/** + * struct papr_sysparm_buf - RTAS work area layout for system parameter functions. + * + * This is the memory layout of the buffers passed to/from + * ibm,get-system-parameter and ibm,set-system-parameter. It is + * distinct from the papr_sysparm_io_block structure that is passed + * between user space and the kernel. + */ struct papr_sysparm_buf { __be16 len; char val[PAPR_SYSPARM_MAX_OUTPUT]; diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h new file mode 100644 index 000000000000..9f9a0f267ea5 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_SYSPARM_H_ +#define _UAPI_PAPR_SYSPARM_H_ + +#include +#include +#include + +enum { + PAPR_SYSPARM_MAX_INPUT = 1024, + PAPR_SYSPARM_MAX_OUTPUT = 4000, +}; + +struct papr_sysparm_io_block { + __u32 parameter; + __u16 length; + char data[PAPR_SYSPARM_MAX_OUTPUT]; +}; + +/** + * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter. + * + * Uses _IOWR because of one corner case: Retrieving the value of the + * "OS Service Entitlement Status" parameter (60) requires the caller + * to supply input data (a date string) in the buffer passed to + * firmware. So the @length and @data of the incoming + * papr_sysparm_io_block are always used to initialize the work area + * supplied to ibm,get-system-parameter. No other parameters are known + * to parameterize the result this way, and callers are encouraged + * (but not required) to zero-initialize @length and @data in the + * common case. + * + * On error the contents of the ioblock are indeterminate. + * + * Return: + * 0: Success; @length is the length of valid data in @data, not to exceed @PAPR_SYSPARM_MAX_OUTPUT. + * -EIO: Platform error. (-1) + * -EINVAL: Incorrect data length or format. (-9999) + * -EPERM: The calling partition is not allowed to access this parameter. (-9002) + * -EOPNOTSUPP: Parameter not supported on this platform (-3) + */ +#define PAPR_SYSPARM_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 1, struct papr_sysparm_io_block) + +/** + * PAPR_SYSPARM_IOC_SET - Update the value of a PAPR system parameter. + * + * The contents of the ioblock are unchanged regardless of success. + * + * Return: + * 0: Success; the parameter has been updated. + * -EIO: Platform error. (-1) + * -EINVAL: Incorrect data length or format. (-9999) + * -EPERM: The calling partition is not allowed to access this parameter. (-9002) + * -EOPNOTSUPP: Parameter not supported on this platform (-3) + */ +#define PAPR_SYSPARM_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 2, struct papr_sysparm_io_block) + +#endif /* _UAPI_PAPR_SYSPARM_H_ */ diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c index a1e7aeac7416..7063ce8884e4 100644 --- a/arch/powerpc/platforms/pseries/papr-sysparm.c +++ b/arch/powerpc/platforms/pseries/papr-sysparm.c @@ -2,14 +2,20 @@ #define pr_fmt(fmt) "papr-sysparm: " fmt +#include #include +#include +#include #include #include +#include #include #include -#include +#include +#include #include #include +#include struct papr_sysparm_buf *papr_sysparm_buf_alloc(void) { @@ -87,7 +93,6 @@ static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf) * * Return: 0 on success, -errno otherwise. @buf is unmodified on error. */ - int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) { const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER); @@ -196,3 +201,152 @@ int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) return ret; } + +static struct papr_sysparm_buf * +papr_sysparm_buf_from_user(const struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + long err; + u16 len; + + /* + * The length of valid data that userspace claims to be in + * user_iob->data[]. + */ + if (get_user(len, &user_iob->length)) + return ERR_PTR(-EFAULT); + + static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_INPUT); + static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_INPUT); + + if (len > PAPR_SYSPARM_MAX_INPUT) + return ERR_PTR(-EINVAL); + + kern_spbuf = papr_sysparm_buf_alloc(); + if (!kern_spbuf) + return ERR_PTR(-ENOMEM); + + papr_sysparm_buf_set_length(kern_spbuf, len); + + if (len > 0 && copy_from_user(kern_spbuf->val, user_iob->data, len)) { + err = -EFAULT; + goto free_sysparm_buf; + } + + return kern_spbuf; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ERR_PTR(err); +} + +static int papr_sysparm_buf_to_user(const struct papr_sysparm_buf *kern_spbuf, + struct papr_sysparm_io_block __user *user_iob) +{ + u16 len_out = papr_sysparm_buf_get_length(kern_spbuf); + + if (put_user(len_out, &user_iob->length)) + return -EFAULT; + + static_assert(sizeof(user_iob->data) >= PAPR_SYSPARM_MAX_OUTPUT); + static_assert(sizeof(kern_spbuf->val) >= PAPR_SYSPARM_MAX_OUTPUT); + + if (copy_to_user(user_iob->data, kern_spbuf->val, PAPR_SYSPARM_MAX_OUTPUT)) + return -EFAULT; + + return 0; +} + +static long papr_sysparm_ioctl_get(struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + papr_sysparm_t param; + long ret; + + if (get_user(param.token, &user_iob->parameter)) + return -EFAULT; + + kern_spbuf = papr_sysparm_buf_from_user(user_iob); + if (IS_ERR(kern_spbuf)) + return PTR_ERR(kern_spbuf); + + ret = papr_sysparm_get(param, kern_spbuf); + if (ret) + goto free_sysparm_buf; + + ret = papr_sysparm_buf_to_user(kern_spbuf, user_iob); + if (ret) + goto free_sysparm_buf; + + ret = 0; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ret; +} + + +static long papr_sysparm_ioctl_set(struct papr_sysparm_io_block __user *user_iob) +{ + struct papr_sysparm_buf *kern_spbuf; + papr_sysparm_t param; + long ret; + + if (get_user(param.token, &user_iob->parameter)) + return -EFAULT; + + kern_spbuf = papr_sysparm_buf_from_user(user_iob); + if (IS_ERR(kern_spbuf)) + return PTR_ERR(kern_spbuf); + + ret = papr_sysparm_set(param, kern_spbuf); + if (ret) + goto free_sysparm_buf; + + ret = 0; + +free_sysparm_buf: + papr_sysparm_buf_free(kern_spbuf); + return ret; +} + +static long papr_sysparm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_SYSPARM_IOC_GET: + ret = papr_sysparm_ioctl_get(argp); + break; + case PAPR_SYSPARM_IOC_SET: + if (filp->f_mode & FMODE_WRITE) + ret = papr_sysparm_ioctl_set(argp); + else + ret = -EBADF; + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_sysparm_ops = { + .unlocked_ioctl = papr_sysparm_ioctl, +}; + +static struct miscdevice papr_sysparm_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-sysparm", + .fops = &papr_sysparm_ops, +}; + +static __init int papr_sysparm_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER)) + return -ENODEV; + + return misc_register(&papr_sysparm_dev); +} +machine_device_initcall(pseries, papr_sysparm_init); -- cgit v1.2.3 From 2ec36570c3581285d15de672eaed10ce7e9218cd Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Fri, 8 Dec 2023 22:00:40 +0530 Subject: powerpc/ftrace: Fix indentation in ftrace.h Replace seven spaces with a tab character to fix an indentation issue reported by the kernel test robot. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311221731.alUwTDIm-lkp@intel.com/ Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/9f058227bd9243f0842786ef7228d87ab10d29f6.1702045299.git.naveen@kernel.org --- arch/powerpc/include/asm/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 9e5a39b6a311..1ebd2ca97f12 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -25,7 +25,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) addr += MCOUNT_INSN_SIZE; - return addr; + return addr; } unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, -- cgit v1.2.3 From ae24db43b3b427eb290b58d55179c32f0a7539d1 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Fri, 8 Dec 2023 22:00:42 +0530 Subject: powerpc/ftrace: Remove nops after the call to ftrace_stub ftrace_stub is within the same CU, so there is no need for a subsequent nop instruction. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/8ee5ec520e37d5523654bb2cd65a17512fb774e2.1702045299.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace_entry.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 90701885762c..a517a4085cff 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -162,7 +162,6 @@ _GLOBAL(ftrace_regs_caller) .globl ftrace_regs_call ftrace_regs_call: bl ftrace_stub - nop ftrace_regs_exit 1 _GLOBAL(ftrace_caller) @@ -171,7 +170,6 @@ _GLOBAL(ftrace_caller) .globl ftrace_call ftrace_call: bl ftrace_stub - nop ftrace_regs_exit 0 _GLOBAL(ftrace_stub) -- cgit v1.2.3 From b20f98e8b3deb50247603f0242ee2d1e38726635 Mon Sep 17 00:00:00 2001 From: Sathvika Vasireddy Date: Fri, 8 Dec 2023 22:00:43 +0530 Subject: powerpc/Kconfig: Select FUNCTION_ALIGNMENT_4B Commit d49a0626216b95 ("arch: Introduce CONFIG_FUNCTION_ALIGNMENT") introduced a generic function-alignment infrastructure. Move to using FUNCTION_ALIGNMENT_4B on powerpc, to use the same alignment as that of the existing _GLOBAL macro. Signed-off-by: Sathvika Vasireddy Signed-off-by: Michael Ellerman Link: https://msgid.link/21892186ec44abe24df0daf64f577dac0e78783f.1702045299.git.naveen@kernel.org --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/linkage.h | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6f105ee4f3cf..318e5c1b7454 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -189,6 +189,7 @@ config PPC select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY + select FUNCTION_ALIGNMENT_4B select GENERIC_ATOMIC64 if PPC32 select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index b88d1d2cf304..b71b9582e754 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -4,9 +4,6 @@ #include -#define __ALIGN .align 2 -#define __ALIGN_STR ".align 2" - #ifdef CONFIG_PPC64_ELF_ABI_V1 #define cond_syscall(x) \ asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ -- cgit v1.2.3 From f46c8a75263f97bda13c739ba1c90aced0d3b071 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 4 Dec 2023 10:32:23 +0800 Subject: powerpc/mm: Fix null-pointer dereference in pgtable_cache_add kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Suggested-by: Christophe Leroy Suggested-by: Michael Ellerman Signed-off-by: Kunwu Chan Signed-off-by: Michael Ellerman Link: https://msgid.link/20231204023223.2447523-1-chentao@kylinos.cn --- arch/powerpc/mm/init-common.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 119ef491f797..d3a7726ecf51 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -126,7 +126,7 @@ void pgtable_cache_add(unsigned int shift) * as to leave enough 0 bits in the address to contain it. */ unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; + struct kmem_cache *new = NULL; /* It would be nice if this was a BUILD_BUG_ON(), but at the * moment, gcc doesn't seem to recognize is_power_of_2 as a @@ -139,7 +139,8 @@ void pgtable_cache_add(unsigned int shift) align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); + if (name) + new = kmem_cache_create(name, table_size, align, 0, ctor(shift)); if (!new) panic("Could not allocate pgtable cache for order %d", shift); -- cgit v1.2.3 From 9a260f2dd827bbc82cc60eb4f4d8c22707d80742 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 8 Dec 2023 16:59:37 +0800 Subject: powerpc/powernv: Add a null pointer check to scom_debug_init_one() kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Add a null pointer check, and release 'ent' to avoid memory leaks. Fixes: bfd2f0d49aef ("powerpc/powernv: Get rid of old scom_controller abstraction") Signed-off-by: Kunwu Chan Signed-off-by: Michael Ellerman Link: https://msgid.link/20231208085937.107210-1-chentao@kylinos.cn --- arch/powerpc/platforms/powernv/opal-xscom.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 262cd6fac907..748c2b97fa53 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -165,6 +165,11 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn, ent->chip = chip; snprintf(ent->name, 16, "%08x", chip); ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn); + if (!ent->path.data) { + kfree(ent); + return -ENOMEM; + } + ent->path.size = strlen((char *)ent->path.data); dir = debugfs_create_dir(ent->name, root); -- cgit v1.2.3 From 8649829a1dd25199bbf557b2621cedb4bf9b3050 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 27 Nov 2023 11:07:55 +0800 Subject: powerpc/powernv: Add a null pointer check in opal_event_init() kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Fixes: 2717a33d6074 ("powerpc/opal-irqchip: Use interrupt names if present") Signed-off-by: Kunwu Chan Signed-off-by: Michael Ellerman Link: https://msgid.link/20231127030755.1546750-1-chentao@kylinos.cn --- arch/powerpc/platforms/powernv/opal-irqchip.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index f9a7001dacb7..56a1f7ce78d2 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -275,6 +275,8 @@ int __init opal_event_init(void) else name = kasprintf(GFP_KERNEL, "opal"); + if (!name) + continue; /* Install interrupt handler */ rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK, name, NULL); -- cgit v1.2.3 From e123015c0ba859cf48aa7f89c5016cc6e98e018d Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Sun, 26 Nov 2023 17:57:39 +0800 Subject: powerpc/powernv: Add a null pointer check in opal_powercap_init() kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Fixes: b9ef7b4b867f ("powerpc: Convert to using %pOFn instead of device_node.name") Signed-off-by: Kunwu Chan Signed-off-by: Michael Ellerman Link: https://msgid.link/20231126095739.1501990-1-chentao@kylinos.cn --- arch/powerpc/platforms/powernv/opal-powercap.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index 7bfe4cbeb35a..ea917266aa17 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -196,6 +196,12 @@ void __init opal_powercap_init(void) j = 0; pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node); + if (!pcaps[i].pg.name) { + kfree(pcaps[i].pattrs); + kfree(pcaps[i].pg.attrs); + goto out_pcaps_pattrs; + } + if (has_min) { powercap_add_attr(min, "powercap-min", &pcaps[i].pattrs[j]); -- cgit v1.2.3 From 0a233867a39078ebb0f575e2948593bbff5826b3 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Sun, 26 Nov 2023 17:37:19 +0800 Subject: powerpc/imc-pmu: Add a null pointer check in update_events_in_group() kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Fixes: 885dcd709ba9 ("powerpc/perf: Add nest IMC PMU support") Signed-off-by: Kunwu Chan Signed-off-by: Michael Ellerman Link: https://msgid.link/20231126093719.1440305-1-chentao@kylinos.cn --- arch/powerpc/perf/imc-pmu.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 5d12ca386c1f..8664a7d297ad 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -299,6 +299,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) attr_group->attrs = attrs; do { ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value); + if (!ev_val_str) + continue; dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str); if (!dev_str) continue; @@ -306,6 +308,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) attrs[j++] = dev_str; if (pmu->events[i].scale) { ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name); + if (!ev_scale_str) + continue; dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale); if (!dev_str) continue; @@ -315,6 +319,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) if (pmu->events[i].unit) { ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name); + if (!ev_unit_str) + continue; dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit); if (!dev_str) continue; -- cgit v1.2.3 From a143892cb77c5397fd4356bbef9982abe4f3c5a5 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Wed, 20 Sep 2023 16:27:06 +0530 Subject: powerpc: add cpu_spec.cpu_features to vmcoreinfo CPU features can be determined in makedumpfile, using 'cur_cpu_spec.cpu_features'. This provides more data to makedumpfile about the crashed system, and can help in filtering the vmcore accordingly. Signed-off-by: Aditya Gupta Signed-off-by: Michael Ellerman Link: https://msgid.link/20230920105706.853626-2-adityag@linux.ibm.com --- arch/powerpc/kexec/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index 85846cadb9b5..27fa9098a5b7 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -75,6 +75,7 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_OFFSET(mmu_psize_def, shift); #endif VMCOREINFO_SYMBOL(cur_cpu_spec); + VMCOREINFO_OFFSET(cpu_spec, cpu_features); VMCOREINFO_OFFSET(cpu_spec, mmu_features); vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled()); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); -- cgit v1.2.3 From 6f4b7052daa060e7d20d6d599697b8ac702a7e69 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 14 Nov 2023 12:42:19 +0530 Subject: powerpc/sched: Cleanup vcpu_is_preempted() No functional change in this patch. A helper is added to find if vcpu is dispatched by hypervisor. Use that instead of opencoding. Also clarify some of the comments. Signed-off-by: "Aneesh Kumar K.V" Signed-off-by: Michael Ellerman Link: https://msgid.link/20231114071219.198222-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/paravirt.h | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index ac4279208d63..b78b82d66057 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -76,6 +76,17 @@ static inline bool is_vcpu_idle(int vcpu) { return lppaca_of(vcpu).idle; } + +static inline bool vcpu_is_dispatched(int vcpu) +{ + /* + * This is the yield_count. An "odd" value (low bit on) means that + * the processor is yielded (either because of an OS yield or a + * hypervisor preempt). An even value implies that the processor is + * currently executing. + */ + return (!(yield_count_of(vcpu) & 1)); +} #else static inline bool is_shared_processor(void) { @@ -109,6 +120,10 @@ static inline bool is_vcpu_idle(int vcpu) { return false; } +static inline bool vcpu_is_dispatched(int vcpu) +{ + return true; +} #endif #define vcpu_is_preempted vcpu_is_preempted @@ -134,12 +149,12 @@ static inline bool vcpu_is_preempted(int cpu) * If the hypervisor has dispatched the target CPU on a physical * processor, then the target CPU is definitely not preempted. */ - if (!(yield_count_of(cpu) & 1)) + if (vcpu_is_dispatched(cpu)) return false; /* - * If the target CPU has yielded to Hypervisor but OS has not - * requested idle then the target CPU is definitely preempted. + * if the target CPU is not dispatched and the guest OS + * has not marked the CPU idle, then it is hypervisor preempted. */ if (!is_vcpu_idle(cpu)) return true; @@ -166,7 +181,7 @@ static inline bool vcpu_is_preempted(int cpu) /* * The PowerVM hypervisor dispatches VMs on a whole core - * basis. So we know that a thread sibling of the local CPU + * basis. So we know that a thread sibling of the executing CPU * cannot have been preempted by the hypervisor, even if it * has called H_CONFER, which will set the yield bit. */ @@ -174,15 +189,17 @@ static inline bool vcpu_is_preempted(int cpu) return false; /* - * If any of the threads of the target CPU's core are not - * preempted or ceded, then consider target CPU to be - * non-preempted. + * The specific target CPU was marked by guest OS as idle, but + * then also check all other cpus in the core for PowerVM + * because it does core scheduling and one of the vcpu + * of the core getting preempted by hypervisor implies + * other vcpus can also be considered preempted. */ first_cpu = cpu_first_thread_sibling(cpu); for (i = first_cpu; i < first_cpu + threads_per_core; i++) { if (i == cpu) continue; - if (!(yield_count_of(i) & 1)) + if (vcpu_is_dispatched(i)) return false; if (!is_vcpu_idle(i)) return true; -- cgit v1.2.3 From aa80c6343fcf53cbc29f84ba9f89ca87d4e41350 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 14 Dec 2023 23:37:11 +0530 Subject: powerpc/smp: Enable Asym packing for cores on shared processor If there are shared processor LPARs, underlying Hypervisor can have more virtual cores to handle than actual physical cores. Starting with Power 9, a big core (aka SMT8 core) has 2 nearly independent thread groups. On a shared processors LPARs, it helps to pack threads to lesser number of cores so that the overall system performance and utilization improves. PowerVM schedules at a big core level. Hence packing to fewer cores helps. Since each thread-group is independent, running threads on both the thread-groups of a SMT8 core, should have a minimal adverse impact in non over provisioned scenarios. These changes in this patchset will not affect in the over provisioned scenario. If there are more threads than SMT domains, then asym_packing will not kick-in For example: Lets says there are two 8-core Shared LPARs that are actually sharing a 8 Core shared physical pool, each running 8 threads each. Then Consolidating 8 threads to 4 cores on each LPAR would help them to perform better. This is because each of the LPAR will get 100% time to run applications and there will no switching required by the Hypervisor. To achieve this, enable SD_ASYM_PACKING flag at CACHE, MC and DIE level when the system is running in shared processor mode and has big cores. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20231214180720.310852-2-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ab691c89d787..3fc8ad9646a4 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1003,6 +1003,13 @@ static int powerpc_smt_flags(void) } #endif +/* + * On shared processor LPARs scheduled on a big core (which has two or more + * independent thread groups per core), prefer lower numbered CPUs, so + * that workload consolidates to lesser number of cores. + */ +static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack); + /* * P9 has a slightly odd architecture where pairs of cores share an L2 cache. * This topology makes it *much* cheaper to migrate tasks between adjacent cores @@ -1011,9 +1018,20 @@ static int powerpc_smt_flags(void) */ static int powerpc_shared_cache_flags(void) { + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING; + return SD_SHARE_PKG_RESOURCES; } +static int powerpc_shared_proc_flags(void) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_ASYM_PACKING; + + return 0; +} + /* * We can't just pass cpu_l2_cache_mask() directly because * returns a non-const pointer and the compiler barfs on that. @@ -1050,8 +1068,8 @@ static struct sched_domain_topology_level powerpc_topology[] = { { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, #endif { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, - { cpu_mc_mask, SD_INIT_NAME(MC) }, - { cpu_cpu_mask, SD_INIT_NAME(PKG) }, + { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) }, + { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG) }, { NULL, }, }; @@ -1686,6 +1704,9 @@ static void __init fixup_topology(void) { int i; + if (is_shared_processor() && has_big_cores) + static_branch_enable(&splpar_asym_pack); + #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); -- cgit v1.2.3 From 0e1c1986e0e65746daa05405d7747ce882f83cf1 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 14 Dec 2023 23:37:12 +0530 Subject: powerpc/smp: Disable MC domain for shared processor Like L2-cache info, coregroup information which is used to determine MC sched domains is only present on dedicated LPARs. i.e PowerVM doesn't export coregroup information for shared processor LPARs. Hence disable creating MC domains on shared LPAR Systems. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20231214180720.310852-3-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3fc8ad9646a4..2cebc53e97f9 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1055,6 +1055,10 @@ static struct cpumask *cpu_coregroup_mask(int cpu) static bool has_coregroup_support(void) { + /* Coregroup identification not available on shared systems */ + if (is_shared_processor()) + return 0; + return coregroup_enabled; } -- cgit v1.2.3 From fd535a858ebeb1f478b1d065b6c057f52aad483a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 14 Dec 2023 23:37:13 +0530 Subject: powerpc/smp: Add __ro_after_init attribute There are some variables that are only updated at boot time. So add __ro_after_init attribute to such variables Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20231214180720.310852-4-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2cebc53e97f9..aea149627209 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -77,10 +77,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; #endif struct task_struct *secondary_current; -bool has_big_cores; -bool coregroup_enabled; -bool thread_group_shares_l2; -bool thread_group_shares_l3; +bool has_big_cores __ro_after_init; +bool coregroup_enabled __ro_after_init; +bool thread_group_shares_l2 __ro_after_init; +bool thread_group_shares_l3 __ro_after_init; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -987,7 +987,7 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) return 0; } -static bool shared_caches; +static bool shared_caches __ro_after_init; #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymmetric SMT dependency */ -- cgit v1.2.3 From 0e93f1c780e8fd315f1262467b7d35eb6f766d2f Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 14 Dec 2023 23:37:14 +0530 Subject: powerpc/smp: Avoid asym packing within thread_group of a core PowerVM Hypervisor will schedule at a core granularity. However each core can have more than one thread_groups. For better utilization in case of a shared processor, its preferable for the scheduler to pack to the lowest core. However there is no benefit of moving a thread between two thread groups of the same core. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20231214180720.310852-5-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index aea149627209..9d8bb9a084bd 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1763,6 +1763,19 @@ void __init smp_cpus_done(unsigned int max_cpus) set_sched_topology(powerpc_topology); } +/* + * For asym packing, by default lower numbered CPU has higher priority. + * On shared processors, pack to lower numbered core. However avoid moving + * between thread_groups within the same core. + */ +int arch_asym_cpu_priority(int cpu) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return -cpu / threads_per_core; + + return -cpu; +} + #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { -- cgit v1.2.3 From c46975715f5a7b941aa09bc0539a8dbe297f308f Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 14 Dec 2023 23:37:15 +0530 Subject: powerpc/smp: Dynamically build Powerpc topology Currently there are four Powerpc specific sched topologies. These are all statically defined. However not all these topologies are used by all Powerpc systems. To avoid unnecessary degenerations by the scheduler, masks and flags are compared. However if the sched topologies are build dynamically then the code is simpler and there are greater chances of avoiding degenerations. Note: Even X86 builds its sched topologies dynamically and proposed changes are very similar to the way X86 is building its topologies. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20231214180720.310852-6-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 78 +++++++++++++++++------------------------------ 1 file changed, 28 insertions(+), 50 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9d8bb9a084bd..693334c20d07 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -93,15 +93,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); EXPORT_SYMBOL_GPL(has_big_cores); -enum { -#ifdef CONFIG_SCHED_SMT - smt_idx, -#endif - cache_idx, - mc_idx, - die_idx, -}; - #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 #define THREAD_GROUP_SHARE_L2_L3 2 @@ -1067,16 +1058,6 @@ static const struct cpumask *cpu_mc_mask(int cpu) return cpu_coregroup_mask(cpu); } -static struct sched_domain_topology_level powerpc_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, - { cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) }, - { cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG) }, - { NULL, }, -}; - static int __init init_big_cores(void) { int cpu; @@ -1704,9 +1685,11 @@ void start_secondary(void *unused) BUG(); } -static void __init fixup_topology(void) +static struct sched_domain_topology_level powerpc_topology[6]; + +static void __init build_sched_topology(void) { - int i; + int i = 0; if (is_shared_processor() && has_big_cores) static_branch_enable(&splpar_asym_pack); @@ -1714,36 +1697,33 @@ static void __init fixup_topology(void) #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); - powerpc_topology[smt_idx].mask = smallcore_smt_mask; + powerpc_topology[i++] = (struct sched_domain_topology_level){ + smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) + }; + } else { + powerpc_topology[i++] = (struct sched_domain_topology_level){ + cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) + }; } #endif + if (shared_caches) { + powerpc_topology[i++] = (struct sched_domain_topology_level){ + shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) + }; + } + if (has_coregroup_support()) { + powerpc_topology[i++] = (struct sched_domain_topology_level){ + cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) + }; + } + powerpc_topology[i++] = (struct sched_domain_topology_level){ + cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG) + }; - if (!has_coregroup_support()) - powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; - - /* - * Try to consolidate topology levels here instead of - * allowing scheduler to degenerate. - * - Dont consolidate if masks are different. - * - Dont consolidate if sd_flags exists and are different. - */ - for (i = 1; i <= die_idx; i++) { - if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask) - continue; - - if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags && - powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags) - continue; - - if (!powerpc_topology[i - 1].sd_flags) - powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags; + /* There must be one trailing NULL entry left. */ + BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1); - powerpc_topology[i].mask = powerpc_topology[i + 1].mask; - powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags; -#ifdef CONFIG_SCHED_DEBUG - powerpc_topology[i].name = powerpc_topology[i + 1].name; -#endif - } + set_sched_topology(powerpc_topology); } void __init smp_cpus_done(unsigned int max_cpus) @@ -1758,9 +1738,7 @@ void __init smp_cpus_done(unsigned int max_cpus) smp_ops->bringup_done(); dump_numa_cpu_topology(); - - fixup_topology(); - set_sched_topology(powerpc_topology); + build_sched_topology(); } /* -- cgit v1.2.3 From 9ec1d7486e2520b4898d7f8e1ec3acc7c13c8dc8 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Tue, 12 Dec 2023 19:44:58 +0100 Subject: powerpc/fsl: Fix fsl,tmu-calibration to match the schema fsl,tmu-calibration is defined as a u32 matrix in Documentation/devicetree/bindings/thermal/qoriq-thermal.yaml. Use matching property syntax. No functional changes. Signed-off-by: David Heidelberg Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212184515.82886-2-david@ixit.cz --- arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 79 +++++++++++++++-------------- arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 71 +++++++++++++------------- 2 files changed, 76 insertions(+), 74 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index d552044c5afc..aa5152ca8120 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -367,45 +367,46 @@ reg = <0xf0000 0x1000>; interrupts = <18 2 0 0>; fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>; - fsl,tmu-calibration = <0x00000000 0x0000000f - 0x00000001 0x00000017 - 0x00000002 0x0000001e - 0x00000003 0x00000026 - 0x00000004 0x0000002e - 0x00000005 0x00000035 - 0x00000006 0x0000003d - 0x00000007 0x00000044 - 0x00000008 0x0000004c - 0x00000009 0x00000053 - 0x0000000a 0x0000005b - 0x0000000b 0x00000064 - - 0x00010000 0x00000011 - 0x00010001 0x0000001c - 0x00010002 0x00000024 - 0x00010003 0x0000002b - 0x00010004 0x00000034 - 0x00010005 0x00000039 - 0x00010006 0x00000042 - 0x00010007 0x0000004c - 0x00010008 0x00000051 - 0x00010009 0x0000005a - 0x0001000a 0x00000063 - - 0x00020000 0x00000013 - 0x00020001 0x00000019 - 0x00020002 0x00000024 - 0x00020003 0x0000002c - 0x00020004 0x00000035 - 0x00020005 0x0000003d - 0x00020006 0x00000046 - 0x00020007 0x00000050 - 0x00020008 0x00000059 - - 0x00030000 0x00000002 - 0x00030001 0x0000000d - 0x00030002 0x00000019 - 0x00030003 0x00000024>; + fsl,tmu-calibration = + <0x00000000 0x0000000f>, + <0x00000001 0x00000017>, + <0x00000002 0x0000001e>, + <0x00000003 0x00000026>, + <0x00000004 0x0000002e>, + <0x00000005 0x00000035>, + <0x00000006 0x0000003d>, + <0x00000007 0x00000044>, + <0x00000008 0x0000004c>, + <0x00000009 0x00000053>, + <0x0000000a 0x0000005b>, + <0x0000000b 0x00000064>, + + <0x00010000 0x00000011>, + <0x00010001 0x0000001c>, + <0x00010002 0x00000024>, + <0x00010003 0x0000002b>, + <0x00010004 0x00000034>, + <0x00010005 0x00000039>, + <0x00010006 0x00000042>, + <0x00010007 0x0000004c>, + <0x00010008 0x00000051>, + <0x00010009 0x0000005a>, + <0x0001000a 0x00000063>, + + <0x00020000 0x00000013>, + <0x00020001 0x00000019>, + <0x00020002 0x00000024>, + <0x00020003 0x0000002c>, + <0x00020004 0x00000035>, + <0x00020005 0x0000003d>, + <0x00020006 0x00000046>, + <0x00020007 0x00000050>, + <0x00020008 0x00000059>, + + <0x00030000 0x00000002>, + <0x00030001 0x0000000d>, + <0x00030002 0x00000019>, + <0x00030003 0x00000024>; #thermal-sensor-cells = <1>; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index ad0ab33336b8..776788623204 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -447,41 +447,42 @@ reg = <0xf0000 0x1000>; interrupts = <18 2 0 0>; fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>; - fsl,tmu-calibration = <0x00000000 0x00000025 - 0x00000001 0x00000028 - 0x00000002 0x0000002d - 0x00000003 0x00000031 - 0x00000004 0x00000036 - 0x00000005 0x0000003a - 0x00000006 0x00000040 - 0x00000007 0x00000044 - 0x00000008 0x0000004a - 0x00000009 0x0000004f - 0x0000000a 0x00000054 - - 0x00010000 0x0000000d - 0x00010001 0x00000013 - 0x00010002 0x00000019 - 0x00010003 0x0000001f - 0x00010004 0x00000025 - 0x00010005 0x0000002d - 0x00010006 0x00000033 - 0x00010007 0x00000043 - 0x00010008 0x0000004b - 0x00010009 0x00000053 - - 0x00020000 0x00000010 - 0x00020001 0x00000017 - 0x00020002 0x0000001f - 0x00020003 0x00000029 - 0x00020004 0x00000031 - 0x00020005 0x0000003c - 0x00020006 0x00000042 - 0x00020007 0x0000004d - 0x00020008 0x00000056 - - 0x00030000 0x00000012 - 0x00030001 0x0000001d>; + fsl,tmu-calibration = + <0x00000000 0x00000025>, + <0x00000001 0x00000028>, + <0x00000002 0x0000002d>, + <0x00000003 0x00000031>, + <0x00000004 0x00000036>, + <0x00000005 0x0000003a>, + <0x00000006 0x00000040>, + <0x00000007 0x00000044>, + <0x00000008 0x0000004a>, + <0x00000009 0x0000004f>, + <0x0000000a 0x00000054>, + + <0x00010000 0x0000000d>, + <0x00010001 0x00000013>, + <0x00010002 0x00000019>, + <0x00010003 0x0000001f>, + <0x00010004 0x00000025>, + <0x00010005 0x0000002d>, + <0x00010006 0x00000033>, + <0x00010007 0x00000043>, + <0x00010008 0x0000004b>, + <0x00010009 0x00000053>, + + <0x00020000 0x00000010>, + <0x00020001 0x00000017>, + <0x00020002 0x0000001f>, + <0x00020003 0x00000029>, + <0x00020004 0x00000031>, + <0x00020005 0x0000003c>, + <0x00020006 0x00000042>, + <0x00020007 0x0000004d>, + <0x00020008 0x00000056>, + + <0x00030000 0x00000012>, + <0x00030001 0x0000001d>; #thermal-sensor-cells = <1>; }; -- cgit v1.2.3 From 6addc560e69cd1b2e68ef43ad62a878ac1956f51 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 21 Dec 2023 12:45:08 +0800 Subject: powerpc/mpc83xx: Add the missing set_freezable() for agent_thread_fn() The kernel thread function agent_thread_fn() invokes the try_to_freeze() in its loop. But all the kernel threads are non-freezable by default. So if we want to make a kernel thread to be freezable, we have to invoke set_freezable() explicitly. Signed-off-by: Kevin Hao Signed-off-by: Michael Ellerman Link: https://msgid.link/20231221044510.1802429-2-haokexin@gmail.com --- arch/powerpc/platforms/83xx/suspend.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 9833c36bda83..eed325ed08cc 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -261,6 +261,8 @@ static int mpc83xx_suspend_begin(suspend_state_t state) static int agent_thread_fn(void *data) { + set_freezable(); + while (1) { wait_event_interruptible(agent_wq, pci_pm_state >= 2); try_to_freeze(); -- cgit v1.2.3 From 11611d254c15cce1f58431b2965c6edb5aa7e610 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 21 Dec 2023 12:45:09 +0800 Subject: powerpc/mpc83xx: Use wait_event_freezable() for freezable kthread A freezable kernel thread can enter frozen state during freezing by either calling try_to_freeze() or using wait_event_freezable() and its variants. So for the following snippet of code in a kernel thread loop: wait_event_interruptible(); try_to_freeze(); We can change it to a simple wait_event_freezable() and then eliminate a function call. Signed-off-by: Kevin Hao Signed-off-by: Michael Ellerman Link: https://msgid.link/20231221044510.1802429-3-haokexin@gmail.com --- arch/powerpc/platforms/83xx/suspend.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index eed325ed08cc..c9664e46b03d 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -264,8 +264,7 @@ static int agent_thread_fn(void *data) set_freezable(); while (1) { - wait_event_interruptible(agent_wq, pci_pm_state >= 2); - try_to_freeze(); + wait_event_freezable(agent_wq, pci_pm_state >= 2); if (signal_pending(current) || pci_pm_state < 2) continue; -- cgit v1.2.3 From ccc0f7b7673e63139ba9d916f4567d4fadb14b55 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 21 Dec 2023 12:45:10 +0800 Subject: powerpc/ps3: Add missing set_freezable() for ps3_probe_thread() The kernel thread function ps3_probe_thread() invokes the try_to_freeze() in its loop. But all the kernel threads are non-freezable by default. So if we want to make a kernel thread to be freezable, we have to invoke set_freezable() explicitly. Signed-off-by: Kevin Hao Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://msgid.link/20231221044510.1802429-4-haokexin@gmail.com --- arch/powerpc/platforms/ps3/device-init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index e87360a0fb40..878bc160246e 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -827,6 +827,7 @@ static int ps3_probe_thread(void *data) if (res) goto fail_free_irq; + set_freezable(); /* Loop here processing the requested notification events. */ do { try_to_freeze(); -- cgit v1.2.3 From eb8446e164572180c2cd0ea4e8494e4419202396 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Tue, 19 Dec 2023 14:52:36 +0530 Subject: powerpc/hvcall: Reorder Nestedv2 hcall opcodes Reorder the newly introduced hcall opcodes for Nestedv2 to follow the increasing-opcode-number convention followed in 'hvcall.h'. Also updates the value for MAX_HCALL_OPCODE which is used in various places in arch code for range checking. Notably in the KVM enabled-hcall logic, and in hcall tracing. Fixes: 19d31c5f1157 ("KVM: PPC: Add support for nestedv2 guests") Suggested-by: Michael Ellerman Signed-off-by: Vaibhav Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20231219092309.118151-1-vaibhav@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index ddb99e982917..a41e542ba94d 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -349,7 +349,16 @@ #define H_GET_ENERGY_SCALE_INFO 0x450 #define H_PKS_SIGNED_UPDATE 0x454 #define H_WATCHDOG 0x45C -#define MAX_HCALL_OPCODE H_WATCHDOG +#define H_GUEST_GET_CAPABILITIES 0x460 +#define H_GUEST_SET_CAPABILITIES 0x464 +#define H_GUEST_CREATE 0x470 +#define H_GUEST_CREATE_VCPU 0x474 +#define H_GUEST_GET_STATE 0x478 +#define H_GUEST_SET_STATE 0x47C +#define H_GUEST_RUN_VCPU 0x480 +#define H_GUEST_COPY_MEMORY 0x484 +#define H_GUEST_DELETE 0x488 +#define MAX_HCALL_OPCODE H_GUEST_DELETE /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -393,15 +402,6 @@ #define H_ENTER_NESTED 0xF804 #define H_TLB_INVALIDATE 0xF808 #define H_COPY_TOFROM_GUEST 0xF80C -#define H_GUEST_GET_CAPABILITIES 0x460 -#define H_GUEST_SET_CAPABILITIES 0x464 -#define H_GUEST_CREATE 0x470 -#define H_GUEST_CREATE_VCPU 0x474 -#define H_GUEST_GET_STATE 0x478 -#define H_GUEST_SET_STATE 0x47C -#define H_GUEST_RUN_VCPU 0x480 -#define H_GUEST_COPY_MEMORY 0x484 -#define H_GUEST_DELETE 0x488 /* Flags for H_SVM_PAGE_IN */ #define H_PAGE_IN_SHARED 0x1 -- cgit v1.2.3 From e6beb47edb89ca9dc8906515e2dfbeb5913312c8 Mon Sep 17 00:00:00 2001 From: Haoran Liu Date: Mon, 27 Nov 2023 06:41:08 -0800 Subject: powerpc/powernv: Add error handling to opal_prd_range_is_valid In the opal_prd_range_is_valid function within opal-prd.c, error handling was missing for the of_get_address call. This patch adds necessary error checking, ensuring that the function gracefully handles scenarios where of_get_address fails. Signed-off-by: Haoran Liu Signed-off-by: Michael Ellerman Link: https://msgid.link/20231127144108.29782-1-liuhaoran14@163.com --- arch/powerpc/platforms/powernv/opal-prd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 327e2f76905d..b66b06efcef1 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -66,6 +66,8 @@ static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size) const char *label; addrp = of_get_address(node, 0, &range_size, NULL); + if (!addrp) + continue; range_addr = of_read_number(addrp, 2); range_end = range_addr + range_size; -- cgit v1.2.3 From 5bb13e63cb00f0fdca5141f33d7a47bb26730a81 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 23 Nov 2023 14:29:02 +1100 Subject: powerpc/86xx: Drop unused CONFIG_MPC8610 The MPC8610 symbol used to be default y if MPC8610_HPCD, but since MPC8610_HPCD was removed MPC8610 is now never used. Remove it. Fixes: 248667f8bbde ("powerpc: drop HPCD/MPC8610 evaluation platform support") Signed-off-by: Michael Ellerman Link: https://msgid.link/20231123032902.2760818-1-mpe@ellerman.id.au --- arch/powerpc/platforms/86xx/Kconfig | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 67467cd6f34c..06b1e5c49d6f 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -52,10 +52,3 @@ config MPC8641 select MPIC default y if GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \ || MVME7100 - -config MPC8610 - bool - select HAVE_PCI - select FSL_PCI if PCI - select PPC_UDBG_16550 - select MPIC -- cgit v1.2.3 From 482b718a84f08b6fc84879c3e90cc57dba11c115 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Sun, 24 Dec 2023 09:52:46 +0900 Subject: powerpc/ps3_defconfig: Disable PPC64_BIG_ENDIAN_ELF_ABI_V2 Commit 8c5fa3b5c4df ("powerpc/64: Make ELFv2 the default for big-endian builds"), merged in Linux-6.5-rc1 changes the calling ABI in a way that is incompatible with the current code for the PS3's LV1 hypervisor calls. This change just adds the line '# CONFIG_PPC64_BIG_ENDIAN_ELF_ABI_V2 is not set' to the ps3_defconfig file so that the PPC64_ELF_ABI_V1 is used. Fixes run time errors like these: BUG: Kernel NULL pointer dereference at 0x00000000 Faulting instruction address: 0xc000000000047cf0 Oops: Kernel access of bad area, sig: 11 [#1] Call Trace: [c0000000023039e0] [c00000000100ebfc] ps3_create_spu+0xc4/0x2b0 (unreliable) [c000000002303ab0] [c00000000100d4c4] create_spu+0xcc/0x3c4 [c000000002303b40] [c00000000100eae4] ps3_enumerate_spus+0xa4/0xf8 Fixes: 8c5fa3b5c4df ("powerpc/64: Make ELFv2 the default for big-endian builds") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://msgid.link/df906ac1-5f17-44b9-b0bb-7cd292a0df65@infradead.org --- arch/powerpc/configs/ps3_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2b175ddf82f0..aa8bb0208bcc 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -24,6 +24,7 @@ CONFIG_PS3_VRAM=m CONFIG_PS3_LPM=m # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set CONFIG_KEXEC=y +# CONFIG_PPC64_BIG_ENDIAN_ELF_ABI_V2 is not set CONFIG_PPC_4K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_PM=y -- cgit v1.2.3