From 38f2c691a4b3e89d476f8e8350d1ca299974b89d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 17 May 2019 12:50:42 +0200 Subject: s390: improve wait logic of stop_machine The stop_machine loop to advance the state machine and to wait for all affected CPUs to check-in calls cpu_relax_yield in a tight loop until the last missing CPUs acknowledged the state transition. On a virtual system where not all logical CPUs are backed by real CPUs all the time it can take a while for all CPUs to check-in. With the current definition of cpu_relax_yield a diagnose 0x44 is done which tells the hypervisor to schedule *some* other CPU. That can be any CPU and not necessarily one of the CPUs that need to run in order to advance the state machine. This can lead to a pretty bad diagnose 0x44 storm until the last missing CPU finally checked-in. Replace the undirected cpu_relax_yield based on diagnose 0x44 with a directed yield. Each CPU in the wait loop will pick up the next CPU in the cpumask of stop_machine. The diagnose 0x9c is used to tell the hypervisor to run this next CPU instead of the current one. If there is only a limited number of real CPUs backing the virtual CPUs we end up with the real CPUs passed around in a round-robin fashion. [heiko.carstens@de.ibm.com]: Use cpumask_next_wrap as suggested by Peter Zijlstra. Signed-off-by: Martin Schwidefsky Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Signed-off-by: Heiko Carstens --- kernel/stop_machine.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2b5a6754646f..b8b0c5ff8da9 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -183,6 +183,7 @@ static int multi_cpu_stop(void *data) struct multi_stop_data *msdata = data; enum multi_stop_state curstate = MULTI_STOP_NONE; int cpu = smp_processor_id(), err = 0; + const struct cpumask *cpumask; unsigned long flags; bool is_active; @@ -192,15 +193,18 @@ static int multi_cpu_stop(void *data) */ local_save_flags(flags); - if (!msdata->active_cpus) - is_active = cpu == cpumask_first(cpu_online_mask); - else - is_active = cpumask_test_cpu(cpu, msdata->active_cpus); + if (!msdata->active_cpus) { + cpumask = cpu_online_mask; + is_active = cpu == cpumask_first(cpumask); + } else { + cpumask = msdata->active_cpus; + is_active = cpumask_test_cpu(cpu, cpumask); + } /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ - cpu_relax_yield(); + cpu_relax_yield(cpumask); if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { -- cgit v1.2.3 From 4ecf0a43e729a7e641d800c294faabe87378fc05 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 8 Jun 2019 12:13:57 +0200 Subject: processor: get rid of cpu_relax_yield stop_machine is the only user left of cpu_relax_yield. Given that it now has special semantics which are tied to stop_machine introduce a weak stop_machine_yield function which architectures can override, and get rid of the generic cpu_relax_yield implementation. Acked-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 6 ------ arch/s390/kernel/processor.c | 4 ++-- include/linux/sched.h | 4 ---- include/linux/stop_machine.h | 1 + kernel/stop_machine.c | 7 ++++++- 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 445ce9ee4404..14883b1562e0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -222,12 +222,6 @@ static __no_kasan_or_inline unsigned short stap(void) return cpu_address; } -/* - * Give up the time slice of the virtual PU. - */ -#define cpu_relax_yield cpu_relax_yield -void cpu_relax_yield(const struct cpumask *cpumask); - #define cpu_relax() barrier() #define ECAG_CACHE_ATTRIBUTE 0 diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 4cdaefec1b7c..6ebc2117c66c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include #include #include #include @@ -59,7 +60,7 @@ void s390_update_cpu_mhz(void) on_each_cpu(update_cpu_mhz, NULL, 0); } -void notrace cpu_relax_yield(const struct cpumask *cpumask) +void notrace stop_machine_yield(const struct cpumask *cpumask) { int cpu, this_cpu; @@ -73,7 +74,6 @@ void notrace cpu_relax_yield(const struct cpumask *cpumask) smp_yield_cpu(cpu); } } -EXPORT_SYMBOL(cpu_relax_yield); /* * cpu_init - initializes state that is per-CPU. diff --git a/include/linux/sched.h b/include/linux/sched.h index 1f9f3160da7e..911675416b05 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1518,10 +1518,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma } #endif -#ifndef cpu_relax_yield -#define cpu_relax_yield(cpumask) cpu_relax() -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 6d3635c86dbe..f9a0c6189852 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -36,6 +36,7 @@ int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); void stop_machine_park(int cpu); void stop_machine_unpark(int cpu); +void stop_machine_yield(const struct cpumask *cpumask); #else /* CONFIG_SMP */ diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b8b0c5ff8da9..b4f83f7bdf86 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -177,6 +177,11 @@ static void ack_state(struct multi_stop_data *msdata) set_state(msdata, msdata->state + 1); } +void __weak stop_machine_yield(const struct cpumask *cpumask) +{ + cpu_relax(); +} + /* This is the cpu_stop function which stops the CPU. */ static int multi_cpu_stop(void *data) { @@ -204,7 +209,7 @@ static int multi_cpu_stop(void *data) /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ - cpu_relax_yield(cpumask); + stop_machine_yield(cpumask); if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { -- cgit v1.2.3