From 1ea6c46a23f1213d1972bfae220db5c165e27bba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 May 2017 15:59:54 +0200 Subject: sched/fair: Propagate an effective runnable_load_avg The load balancer uses runnable_load_avg as load indicator. For !cgroup this is: runnable_load_avg = \Sum se->avg.load_avg ; where se->on_rq That is, a direct sum of all runnable tasks on that runqueue. As opposed to load_avg, which is a sum of all tasks on the runqueue, which includes a blocked component. However, in the cgroup case, this comes apart since the group entities are always runnable, even if most of their constituent entities are blocked. Therefore introduce a runnable_weight which for task entities is the same as the regular weight, but for group entities is a fraction of the entity weight and represents the runnable part of the group runqueue. Then propagate this load through the PELT hierarchy to arrive at an effective runnable load avgerage -- which we should not confuse with the canonical runnable load average. Suggested-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a7df4e558c..bdd6ad6fcce1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -331,9 +331,11 @@ struct load_weight { struct sched_avg { u64 last_update_time; u64 load_sum; + u64 runnable_load_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; + unsigned long runnable_load_avg; unsigned long util_avg; }; @@ -376,6 +378,7 @@ struct sched_statistics { struct sched_entity { /* For load-balancing: */ struct load_weight load; + unsigned long runnable_weight; struct rb_node run_node; struct list_head group_node; unsigned int on_rq; -- cgit v1.2.3 From 1d48b080bcce0a5e7d7aa2dbcdb35deefc188c3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Sep 2017 13:50:16 +0200 Subject: sched/debug: Rename task-state printing helpers Steve requested better names for the new task-state helper functions. So introduce the concept of task-state index for the printing and rename __get_task_state() to task_state_index() and __task_state_to_char() to task_index_to_char(). Requested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929115016.pzlqc7ss3ccystyg@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- fs/proc/array.c | 2 +- include/linux/sched.h | 6 +++--- include/trace/events/sched.h | 2 +- kernel/trace/trace_output.c | 12 ++++++------ kernel/trace/trace_sched_wakeup.c | 8 ++++---- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/proc/array.c b/fs/proc/array.c index 77a8eacbe032..1b5406ca8e4c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -137,7 +137,7 @@ static const char * const task_state_array[] = { static inline const char *get_task_state(struct task_struct *tsk) { BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array)); - return task_state_array[__get_task_state(tsk)]; + return task_state_array[task_state_index(tsk)]; } static inline int get_task_umask(struct task_struct *tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h index bdd6ad6fcce1..33a01f4deb00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1248,7 +1248,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int __get_task_state(struct task_struct *tsk) +static inline unsigned int task_state_index(struct task_struct *tsk) { unsigned int tsk_state = READ_ONCE(tsk->state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; @@ -1261,7 +1261,7 @@ static inline unsigned int __get_task_state(struct task_struct *tsk) return fls(state); } -static inline char __task_state_to_char(unsigned int state) +static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; @@ -1272,7 +1272,7 @@ static inline char __task_state_to_char(unsigned int state) static inline char task_state_to_char(struct task_struct *tsk) { - return __task_state_to_char(__get_task_state(tsk)); + return task_index_to_char(task_state_index(tsk)); } /** diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 3c8b7f625670..fab74a12ca0f 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -117,7 +117,7 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * if (preempt) return TASK_STATE_MAX; - return __get_task_state(p); + return task_state_index(p); } #endif /* CREATE_TRACE_POINTS */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c738e764e2a5..90db994ac900 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -921,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, trace_assign_type(field, iter->ent); - T = __task_state_to_char(field->next_state); - S = __task_state_to_char(field->prev_state); + T = task_index_to_char(field->next_state); + S = task_index_to_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); trace_seq_printf(&iter->seq, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", @@ -957,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - S = __task_state_to_char(field->prev_state); - T = __task_state_to_char(field->next_state); + S = task_index_to_char(field->prev_state); + T = task_index_to_char(field->next_state); trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", field->prev_pid, field->prev_prio, @@ -993,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) trace_assign_type(field, iter->ent); if (!S) - S = __task_state_to_char(field->prev_state); - T = __task_state_to_char(field->next_state); + S = task_index_to_char(field->prev_state); + T = task_index_to_char(field->next_state); SEQ_PUT_HEX_FIELD(s, field->prev_pid); SEQ_PUT_HEX_FIELD(s, field->prev_prio); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0c331978b1a6..500f370d3bb1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; - entry->prev_state = __get_task_state(prev); + entry->prev_state = task_state_index(prev); entry->next_pid = next->pid; entry->next_prio = next->prio; - entry->next_state = __get_task_state(next); + entry->next_state = task_state_index(next); entry->next_cpu = task_cpu(next); if (!call_filter_check_discard(call, entry, buffer, event)) @@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; - entry->prev_state = __get_task_state(curr); + entry->prev_state = task_state_index(curr); entry->next_pid = wakee->pid; entry->next_prio = wakee->prio; - entry->next_state = __get_task_state(wakee); + entry->next_state = task_state_index(wakee); entry->next_cpu = task_cpu(wakee); if (!call_filter_check_discard(call, entry, buffer, event)) -- cgit v1.2.3 From 799ba82de01e7543f6b2042e1a739f3a20255f23 Mon Sep 17 00:00:00 2001 From: luca abeni Date: Thu, 7 Sep 2017 12:09:31 +0200 Subject: sched/deadline: Use C bitfields for the state flags Ask the compiler to use a single bit for storing true / false values, instead of wasting the size of a whole int value. Tested with gcc 5.4.0 on x86_64, and the compiler produces the expected Assembly (similar to the Assembly code generated when explicitly accessing the bits with bitmasks, "&" and "|"). Signed-off-by: luca abeni Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1504778971-13573-5-git-send-email-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33a01f4deb00..0f897dfc195e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -474,10 +474,10 @@ struct sched_dl_entity { * conditions between the inactive timer handler and the wakeup * code. */ - int dl_throttled; - int dl_boosted; - int dl_yielded; - int dl_non_contending; + int dl_throttled : 1; + int dl_boosted : 1; + int dl_yielded : 1; + int dl_non_contending : 1; /* * Bandwidth enforcement timer. Each -deadline task has its -- cgit v1.2.3 From ff0d4a9dc16b1f4c954f6407c233ab848bdfe8b0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 17:49:00 +0200 Subject: sched/rt: Add a helper to test for a RT task This helper returns true if a task has elevated priority which is true for RT tasks (SCHED_RR and SCHED_FIFO) and also for SCHED_DEADLINE. A task which runs at RT priority due to PI-boosting is not considered as one with elevated priority. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171004154901.26904-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched/rt.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index f93329aba31a..133001627ba1 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -17,6 +17,17 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } +static inline bool task_is_realtime(struct task_struct *tsk) +{ + int policy = tsk->policy; + + if (policy == SCHED_FIFO || policy == SCHED_RR) + return true; + if (policy == SCHED_DEADLINE) + return true; + return false; +} + #ifdef CONFIG_RT_MUTEXES /* * Must hold either p->pi_lock or task_rq(p)->lock. -- cgit v1.2.3 From 36436440cd19f59f5be12a1b181d299af2725140 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 17:49:01 +0200 Subject: block/ioprio: Use a helper to check for RT prio A side-effect to the old code is that now SCHED_DEADLINE is also recognized. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171004154901.26904-2-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/ioprio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 8c1239020d79..2f19aab84a4a 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -2,6 +2,7 @@ #define IOPRIO_H #include +#include #include /* @@ -62,7 +63,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR) + else if (task_is_realtime(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; -- cgit v1.2.3 From e22cdc3fc5991956146b9856d36b4971fe54dcd6 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Mon, 23 Oct 2017 19:01:54 +0600 Subject: sched/isolcpus: Fix "isolcpus=" boot parameter handling when !CONFIG_CPUMASK_OFFSTACK cpulist_parse() uses nr_cpumask_bits as a limit to parse the passed buffer from kernel commandline. What nr_cpumask_bits represents varies depending upon the CONFIG_CPUMASK_OFFSTACK option: - If CONFIG_CPUMASK_OFFSTACK=n, then nr_cpumask_bits is the same as NR_CPUS, which might not represent the # of CPUs that really exist (default 64). So, there's a chance of a gap between nr_cpu_ids and NR_CPUS, which ultimately lead towards invalid cpulist_parse() operation. For example, if isolcpus=9 is passed on an 8 cpu system (CONFIG_CPUMASK_OFFSTACK=n) it doesn't show the error that it's supposed to. This patch fixes this bug by finding the last CPU of the passed isolcpus= list and checking it against nr_cpu_ids. It also fixes the error message where the nr_cpu_ids should be nr_cpu_ids-1, since CPU numbering starts from 0. Signed-off-by: Rakib Mullick Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adobriyan@gmail.com Cc: akpm@linux-foundation.org Cc: longman@redhat.com Cc: mka@chromium.org Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20171023130154.9050-1-rakib.mullick@gmail.com [ Enhanced the changelog and the kernel message. ] Signed-off-by: Ingo Molnar include/linux/cpumask.h | 16 ++++++++++++++++ kernel/sched/topology.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) --- include/linux/cpumask.h | 16 ++++++++++++++++ kernel/sched/topology.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cd415b733c2a..63661de67ad4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -130,6 +130,11 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return 0; +} + /* Valid inputs for n are -1 and 0. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { @@ -178,6 +183,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_last - get the last CPU in a cpumask + * @srcp: - the cpumask pointer + * + * Returns >= nr_cpumask_bits if no CPUs set. + */ +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + unsigned int cpumask_next(int n, const struct cpumask *srcp); /** diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index e50450c2fed8..e3d31b0880dc 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -476,8 +476,8 @@ static int __init isolated_cpu_setup(char *str) alloc_bootmem_cpumask_var(&cpu_isolated_map); ret = cpulist_parse(str, cpu_isolated_map); - if (ret) { - pr_err("sched: Error, all isolcpus= values must be between 0 and %u\n", nr_cpu_ids); + if (ret || cpumask_last(cpu_isolated_map) >= nr_cpu_ids) { + pr_err("sched: Error, all isolcpus= values must be between 0 and %u - ignoring them.\n", nr_cpu_ids-1); return 0; } return 1; -- cgit v1.2.3 From 7863406143d8bbbbda07a61285c5f4c217908dfd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:28 +0200 Subject: sched/isolation: Move housekeeping related code to its own file The housekeeping code is currently tied to the NOHZ code. As we are planning to make housekeeping independent from it, start with moving the relevant code to its own file. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Acked-by: Paul E. McKenney Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-2-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- drivers/net/ethernet/tile/tilegx.c | 2 +- include/linux/sched/isolation.h | 56 ++++++++++++++++++++++++++++++++++++++ include/linux/tick.h | 37 ------------------------- init/main.c | 2 ++ kernel/rcu/tree_plugin.h | 1 + kernel/rcu/update.c | 1 + kernel/sched/Makefile | 1 + kernel/sched/core.c | 1 + kernel/sched/fair.c | 1 + kernel/sched/isolation.c | 33 ++++++++++++++++++++++ kernel/time/tick-sched.c | 18 ------------ kernel/watchdog.c | 1 + 12 files changed, 98 insertions(+), 56 deletions(-) create mode 100644 include/linux/sched/isolation.h create mode 100644 kernel/sched/isolation.c (limited to 'include') diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index c00102b8145a..27a3272dcd48 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h new file mode 100644 index 000000000000..b7cfbc46286c --- /dev/null +++ b/include/linux/sched/isolation.h @@ -0,0 +1,56 @@ +#ifndef _LINUX_SCHED_ISOLATION_H +#define _LINUX_SCHED_ISOLATION_H + +#include +#include +#include + +#ifdef CONFIG_NO_HZ_FULL +extern cpumask_var_t housekeeping_mask; + +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + +extern void __init housekeeping_init(void); + +#else + +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} + +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ + + +static inline const struct cpumask *housekeeping_cpumask(void) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return housekeeping_mask; +#endif + return cpu_possible_mask; +} + +static inline bool is_housekeeping_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return cpumask_test_cpu(cpu, housekeeping_mask); +#endif + return true; +} + +static inline void housekeeping_affine(struct task_struct *t) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + set_cpus_allowed_ptr(t, housekeeping_mask); + +#endif +} + +#endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index fe01e68bf520..68afc09aa8ac 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -137,7 +137,6 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; extern cpumask_var_t tick_nohz_full_mask; -extern cpumask_var_t housekeeping_mask; static inline bool tick_nohz_full_enabled(void) { @@ -161,11 +160,6 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } -static inline int housekeeping_any_cpu(void) -{ - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -} - extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); @@ -235,10 +229,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); #else -static inline int housekeeping_any_cpu(void) -{ - return smp_processor_id(); -} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } @@ -260,33 +250,6 @@ static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } #endif -static inline const struct cpumask *housekeeping_cpumask(void) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return housekeeping_mask; -#endif - return cpu_possible_mask; -} - -static inline bool is_housekeeping_cpu(int cpu) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return cpumask_test_cpu(cpu, housekeeping_mask); -#endif - return true; -} - -static inline void housekeeping_affine(struct task_struct *t) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - set_cpus_allowed_ptr(t, housekeeping_mask); - -#endif -} - static inline void tick_nohz_task_switch(void) { if (tick_nohz_full_enabled()) diff --git a/init/main.c b/init/main.c index 0ee9c6866ada..4610c99ae306 100644 --- a/init/main.c +++ b/init/main.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -606,6 +607,7 @@ asmlinkage __visible void __init start_kernel(void) early_irq_init(); init_IRQ(); tick_init(); + housekeeping_init(); rcu_init_nohz(); init_timers(); hrtimers_init(); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e012b9be777e..c7632f51c5a0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "../time/tick-internal.h" diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5033b66d2753..79abeb0ca329 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -51,6 +51,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 78f54932ea1d..871d43d73375 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -26,3 +26,4 @@ obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_MEMBARRIER) += membarrier.o +obj-$(CONFIG_NO_HZ_FULL) += isolation.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2288a145bf01..ad188acb7636 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 56f343b8e749..591481db8c6a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -32,6 +32,7 @@ #include #include #include +#include #include diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c new file mode 100644 index 000000000000..3589252ed476 --- /dev/null +++ b/kernel/sched/isolation.c @@ -0,0 +1,33 @@ +/* + * Housekeeping management. Manage the targets for routine code that can run on + * any CPU: unbound workqueues, timers, kthreads and any offloadable work. + * + * Copyright (C) 2017 Red Hat, Inc., Frederic Weisbecker + * + */ + +#include +#include +#include +#include + +cpumask_var_t housekeeping_mask; + +void __init housekeeping_init(void) +{ + if (!tick_nohz_full_enabled()) + return; + + if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) { + WARN(1, "NO_HZ: Can't allocate not-full dynticks cpumask\n"); + cpumask_clear(tick_nohz_full_mask); + tick_nohz_full_running = false; + return; + } + + cpumask_andnot(housekeeping_mask, + cpu_possible_mask, tick_nohz_full_mask); + + /* We need at least one CPU to handle housekeeping work */ + WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); +} diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7b258c59d78a..27d7d522ac4e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -166,7 +166,6 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; -cpumask_var_t housekeeping_mask; bool tick_nohz_full_running; static atomic_t tick_dep_mask; @@ -438,13 +437,6 @@ void __init tick_nohz_init(void) return; } - if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) { - WARN(1, "NO_HZ: Can't allocate not-full dynticks cpumask\n"); - cpumask_clear(tick_nohz_full_mask); - tick_nohz_full_running = false; - return; - } - /* * Full dynticks uses irq work to drive the tick rescheduling on safe * locking contexts. But then we need irq work to raise its own @@ -453,7 +445,6 @@ void __init tick_nohz_init(void) if (!arch_irq_work_has_interrupt()) { pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n"); cpumask_clear(tick_nohz_full_mask); - cpumask_copy(housekeeping_mask, cpu_possible_mask); tick_nohz_full_running = false; return; } @@ -466,9 +457,6 @@ void __init tick_nohz_init(void) cpumask_clear_cpu(cpu, tick_nohz_full_mask); } - cpumask_andnot(housekeeping_mask, - cpu_possible_mask, tick_nohz_full_mask); - for_each_cpu(cpu, tick_nohz_full_mask) context_tracking_cpu_set(cpu); @@ -478,12 +466,6 @@ void __init tick_nohz_init(void) WARN_ON(ret < 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); - - /* - * We need at least one CPU to handle housekeeping work such - * as timekeeping, unbound timers, workqueues, ... - */ - WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } #endif diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6bcb854909c0..3c44dbaa4b9f 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 9f0ca2d97ef0b5e966be2cfef26c7c094ec14e41 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:30 +0200 Subject: sched/isolation: Provide a dynamic off-case to housekeeping_any_cpu() housekeeping_any_cpu() doesn't handle correctly the case where CONFIG_NO_HZ_FULL=y and no CPU is in nohz_full mode. So far no caller needs this but let's prepare to avoid any future surprise. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-4-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index b7cfbc46286c..040df04fa78a 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -7,25 +7,20 @@ #ifdef CONFIG_NO_HZ_FULL extern cpumask_var_t housekeeping_mask; - -static inline int housekeeping_any_cpu(void) -{ - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -} - extern void __init housekeeping_init(void); - #else +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ static inline int housekeeping_any_cpu(void) { +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +#endif return smp_processor_id(); } -static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ - - static inline const struct cpumask *housekeeping_cpumask(void) { #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 7e56a1cf4b28f5739526877b8dbad623fae2e4e7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:31 +0200 Subject: sched/isolation: Make the housekeeping cpumask private Nobody needs to access this detail. housekeeping_cpumask() already takes care of it. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-5-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 31 ++++++++++--------------------- kernel/sched/isolation.c | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 040df04fa78a..ed935ffc6ffa 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -6,46 +6,35 @@ #include #ifdef CONFIG_NO_HZ_FULL -extern cpumask_var_t housekeeping_mask; +extern int housekeeping_any_cpu(void); +extern const struct cpumask *housekeeping_cpumask(void); +extern void housekeeping_affine(struct task_struct *t); +extern bool housekeeping_test_cpu(int cpu); extern void __init housekeeping_init(void); + #else -static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ static inline int housekeeping_any_cpu(void) { -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -#endif return smp_processor_id(); } static inline const struct cpumask *housekeeping_cpumask(void) { -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return housekeeping_mask; -#endif return cpu_possible_mask; } +static inline void housekeeping_affine(struct task_struct *t) { } +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ + static inline bool is_housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_enabled()) - return cpumask_test_cpu(cpu, housekeeping_mask); + return housekeeping_test_cpu(cpu); #endif return true; } -static inline void housekeeping_affine(struct task_struct *t) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - set_cpus_allowed_ptr(t, housekeeping_mask); - -#endif -} - #endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 3589252ed476..16445097eb25 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -11,7 +11,41 @@ #include #include -cpumask_var_t housekeeping_mask; +static cpumask_var_t housekeeping_mask; + +int housekeeping_any_cpu(void) +{ + if (tick_nohz_full_enabled()) + return cpumask_any_and(housekeeping_mask, cpu_online_mask); + + return smp_processor_id(); +} +EXPORT_SYMBOL_GPL(housekeeping_any_cpu); + +const struct cpumask *housekeeping_cpumask(void) +{ + if (tick_nohz_full_enabled()) + return housekeeping_mask; + + return cpu_possible_mask; +} +EXPORT_SYMBOL_GPL(housekeeping_cpumask); + +void housekeeping_affine(struct task_struct *t) +{ + if (tick_nohz_full_enabled()) + set_cpus_allowed_ptr(t, housekeeping_mask); +} +EXPORT_SYMBOL_GPL(housekeeping_affine); + +bool housekeeping_test_cpu(int cpu) +{ + if (tick_nohz_full_enabled()) + return cpumask_test_cpu(cpu, housekeeping_mask); + + return true; +} +EXPORT_SYMBOL_GPL(housekeeping_test_cpu); void __init housekeeping_init(void) { -- cgit v1.2.3 From e179f5a04ba46ee5c5439480c2bfd68c358168b7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:32 +0200 Subject: sched/isolation: Use its own static key Housekeeping code still depends on the nohz_full static key. Since we want to decouple housekeeping from NOHZ, let's create a housekeeping specific static key. It's mostly relevant for calls to is_housekeeping_cpu() from the scheduler. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-6-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 3 ++- kernel/sched/isolation.c | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index ed935ffc6ffa..194c586fbb12 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -6,6 +6,7 @@ #include #ifdef CONFIG_NO_HZ_FULL +DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); extern int housekeeping_any_cpu(void); extern const struct cpumask *housekeeping_cpumask(void); extern void housekeeping_affine(struct task_struct *t); @@ -31,7 +32,7 @@ static inline void housekeeping_init(void) { } static inline bool is_housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) + if (static_branch_unlikely(&housekeeping_overriden)) return housekeeping_test_cpu(cpu); #endif return true; diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 16445097eb25..bb8ba19a0235 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -10,12 +10,15 @@ #include #include #include +#include +DEFINE_STATIC_KEY_FALSE(housekeeping_overriden); +EXPORT_SYMBOL_GPL(housekeeping_overriden); static cpumask_var_t housekeeping_mask; int housekeeping_any_cpu(void) { - if (tick_nohz_full_enabled()) + if (static_branch_unlikely(&housekeeping_overriden)) return cpumask_any_and(housekeeping_mask, cpu_online_mask); return smp_processor_id(); @@ -24,7 +27,7 @@ EXPORT_SYMBOL_GPL(housekeeping_any_cpu); const struct cpumask *housekeeping_cpumask(void) { - if (tick_nohz_full_enabled()) + if (static_branch_unlikely(&housekeeping_overriden)) return housekeeping_mask; return cpu_possible_mask; @@ -33,14 +36,14 @@ EXPORT_SYMBOL_GPL(housekeeping_cpumask); void housekeeping_affine(struct task_struct *t) { - if (tick_nohz_full_enabled()) + if (static_branch_unlikely(&housekeeping_overriden)) set_cpus_allowed_ptr(t, housekeeping_mask); } EXPORT_SYMBOL_GPL(housekeeping_affine); bool housekeeping_test_cpu(int cpu) { - if (tick_nohz_full_enabled()) + if (static_branch_unlikely(&housekeeping_overriden)) return cpumask_test_cpu(cpu, housekeeping_mask); return true; @@ -62,6 +65,8 @@ void __init housekeeping_init(void) cpumask_andnot(housekeeping_mask, cpu_possible_mask, tick_nohz_full_mask); + static_branch_enable(&housekeeping_overriden); + /* We need at least one CPU to handle housekeeping work */ WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } -- cgit v1.2.3 From 204c083a009378dfa751175b5fcddc75988bab6c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:33 +0200 Subject: sched/isolation: Rename is_housekeeping_cpu() to housekeeping_cpu() Fit it into the housekeeping_*() namespace. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-7-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 2 +- kernel/sched/core.c | 6 +++--- kernel/sched/fair.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 194c586fbb12..ad0f5d986a2e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -29,7 +29,7 @@ static inline void housekeeping_affine(struct task_struct *t) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_NO_HZ_FULL */ -static inline bool is_housekeeping_cpu(int cpu) +static inline bool housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (static_branch_unlikely(&housekeeping_overriden)) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ad188acb7636..d0fb448dd43a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -527,7 +527,7 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(); struct sched_domain *sd; - if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) + if (!idle_cpu(cpu) && housekeeping_cpu(cpu)) return cpu; rcu_read_lock(); @@ -536,14 +536,14 @@ int get_nohz_timer_target(void) if (cpu == i) continue; - if (!idle_cpu(i) && is_housekeeping_cpu(i)) { + if (!idle_cpu(i) && housekeeping_cpu(i)) { cpu = i; goto unlock; } } } - if (!is_housekeeping_cpu(cpu)) + if (!housekeeping_cpu(cpu)) cpu = housekeeping_any_cpu(); unlock: rcu_read_unlock(); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 591481db8c6a..cdece8f967f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9027,7 +9027,7 @@ void nohz_balance_enter_idle(int cpu) return; /* Spare idle load balancing on CPUs that don't want to be disturbed: */ - if (!is_housekeeping_cpu(cpu)) + if (!housekeeping_cpu(cpu)) return; if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) -- cgit v1.2.3 From 5c4991e24c69737bd41fc2737b1e3980abbf73f9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:34 +0200 Subject: sched/isolation: Split out new CONFIG_CPU_ISOLATION=y config from CONFIG_NO_HZ_FULL Split the housekeeping config from CONFIG_NO_HZ_FULL. This way we finally separate the isolation code from NOHZ. Although a dependency to CONFIG_NO_HZ_FULL remains for now, while the housekeeping code still deals with NOHZ internals. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-8-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 6 +++--- init/Kconfig | 8 ++++++++ kernel/sched/Makefile | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index ad0f5d986a2e..93ac2367a520 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,7 +5,7 @@ #include #include -#ifdef CONFIG_NO_HZ_FULL +#ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); extern int housekeeping_any_cpu(void); extern const struct cpumask *housekeeping_cpumask(void); @@ -27,11 +27,11 @@ static inline const struct cpumask *housekeeping_cpumask(void) static inline void housekeeping_affine(struct task_struct *t) { } static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ +#endif /* CONFIG_CPU_ISOLATION */ static inline bool housekeeping_cpu(int cpu) { -#ifdef CONFIG_NO_HZ_FULL +#ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overriden)) return housekeeping_test_cpu(cpu); #endif diff --git a/init/Kconfig b/init/Kconfig index 78cb2461012e..6f52e6f4bd9d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -472,6 +472,14 @@ config TASK_IO_ACCOUNTING endmenu # "CPU/Task time and stats accounting" +config CPU_ISOLATION + bool "CPU isolation" + depends on NO_HZ_FULL + help + Make sure that CPUs running critical tasks are not disturbed by + any source of "noise" such as unbound workqueues, timers, kthreads... + Unbound jobs get offloaded to housekeeping CPUs. + source "kernel/rcu/Kconfig" config BUILD_BIN2C diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 871d43d73375..dbe61302c352 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -26,4 +26,4 @@ obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_MEMBARRIER) += membarrier.o -obj-$(CONFIG_NO_HZ_FULL) += isolation.o +obj-$(CONFIG_CPU_ISOLATION) += isolation.o -- cgit v1.2.3 From de201559df872f83d0c08fb4effe3efd28e6cbc8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:35 +0200 Subject: sched/isolation: Introduce housekeeping flags Before we implement isolcpus under housekeeping, we need the isolation features to be more finegrained. For example some people want NOHZ_FULL without the full scheduler isolation, others want full scheduler isolation without NOHZ_FULL. So let's cut all these isolation features piecewise, at the risk of overcutting it right now. We can still merge some flags later if they always make sense together. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-9-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- drivers/net/ethernet/tile/tilegx.c | 4 ++-- include/linux/sched/isolation.h | 26 +++++++++++++++++--------- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 2 +- kernel/sched/core.c | 8 ++++---- kernel/sched/fair.c | 2 +- kernel/sched/isolation.c | 26 +++++++++++++++----------- kernel/watchdog.c | 3 ++- 8 files changed, 43 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index 27a3272dcd48..b3e5816a4678 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -2270,8 +2270,8 @@ static int __init tile_net_init_module(void) tile_net_dev_init(name, mac); if (!network_cpus_init()) - cpumask_and(&network_cpus_map, housekeeping_cpumask(), - cpu_online_mask); + cpumask_and(&network_cpus_map, + housekeeping_cpumask(HK_FLAG_MISC), cpu_online_mask); return 0; } diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 93ac2367a520..9bb753eece3b 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,35 +5,43 @@ #include #include +enum hk_flags { + HK_FLAG_TIMER = 1, + HK_FLAG_RCU = (1 << 1), + HK_FLAG_MISC = (1 << 2), + HK_FLAG_SCHED = (1 << 3), +}; + #ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); -extern int housekeeping_any_cpu(void); -extern const struct cpumask *housekeeping_cpumask(void); -extern void housekeeping_affine(struct task_struct *t); -extern bool housekeeping_test_cpu(int cpu); +extern int housekeeping_any_cpu(enum hk_flags flags); +extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); +extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); +extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); extern void __init housekeeping_init(void); #else -static inline int housekeeping_any_cpu(void) +static inline int housekeeping_any_cpu(enum hk_flags flags) { return smp_processor_id(); } -static inline const struct cpumask *housekeeping_cpumask(void) +static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) { return cpu_possible_mask; } -static inline void housekeeping_affine(struct task_struct *t) { } +static inline void housekeeping_affine(struct task_struct *t, + enum hk_flags flags) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ -static inline bool housekeeping_cpu(int cpu) +static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) { #ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overriden)) - return housekeeping_test_cpu(cpu); + return housekeeping_test_cpu(cpu, flags); #endif return true; } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c7632f51c5a0..34125d23e58a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2584,7 +2584,7 @@ static void rcu_bind_gp_kthread(void) if (!tick_nohz_full_enabled()) return; - housekeeping_affine(current); + housekeeping_affine(current, HK_FLAG_RCU); } /* Record the current task on dyntick-idle entry. */ diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 79abeb0ca329..e3e60efaafee 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -719,7 +719,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) LIST_HEAD(rcu_tasks_holdouts); /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ - housekeeping_affine(current); + housekeeping_affine(current, HK_FLAG_RCU); /* * Each pass through the following loop makes one check for diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d0fb448dd43a..2210c0203e51 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -527,7 +527,7 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(); struct sched_domain *sd; - if (!idle_cpu(cpu) && housekeeping_cpu(cpu)) + if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) return cpu; rcu_read_lock(); @@ -536,15 +536,15 @@ int get_nohz_timer_target(void) if (cpu == i) continue; - if (!idle_cpu(i) && housekeeping_cpu(i)) { + if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) { cpu = i; goto unlock; } } } - if (!housekeeping_cpu(cpu)) - cpu = housekeeping_any_cpu(); + if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) + cpu = housekeeping_any_cpu(HK_FLAG_TIMER); unlock: rcu_read_unlock(); return cpu; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cdece8f967f0..f755de86a813 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9027,7 +9027,7 @@ void nohz_balance_enter_idle(int cpu) return; /* Spare idle load balancing on CPUs that don't want to be disturbed: */ - if (!housekeeping_cpu(cpu)) + if (!housekeeping_cpu(cpu, HK_FLAG_SCHED)) return; if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index bb8ba19a0235..37a138a780ff 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -15,37 +15,39 @@ DEFINE_STATIC_KEY_FALSE(housekeeping_overriden); EXPORT_SYMBOL_GPL(housekeeping_overriden); static cpumask_var_t housekeeping_mask; +static unsigned int housekeeping_flags; -int housekeeping_any_cpu(void) +int housekeeping_any_cpu(enum hk_flags flags) { if (static_branch_unlikely(&housekeeping_overriden)) - return cpumask_any_and(housekeeping_mask, cpu_online_mask); - + if (housekeeping_flags & flags) + return cpumask_any_and(housekeeping_mask, cpu_online_mask); return smp_processor_id(); } EXPORT_SYMBOL_GPL(housekeeping_any_cpu); -const struct cpumask *housekeeping_cpumask(void) +const struct cpumask *housekeeping_cpumask(enum hk_flags flags) { if (static_branch_unlikely(&housekeeping_overriden)) - return housekeeping_mask; - + if (housekeeping_flags & flags) + return housekeeping_mask; return cpu_possible_mask; } EXPORT_SYMBOL_GPL(housekeeping_cpumask); -void housekeeping_affine(struct task_struct *t) +void housekeeping_affine(struct task_struct *t, enum hk_flags flags) { if (static_branch_unlikely(&housekeeping_overriden)) - set_cpus_allowed_ptr(t, housekeeping_mask); + if (housekeeping_flags & flags) + set_cpus_allowed_ptr(t, housekeeping_mask); } EXPORT_SYMBOL_GPL(housekeeping_affine); -bool housekeeping_test_cpu(int cpu) +bool housekeeping_test_cpu(int cpu, enum hk_flags flags) { if (static_branch_unlikely(&housekeeping_overriden)) - return cpumask_test_cpu(cpu, housekeeping_mask); - + if (housekeeping_flags & flags) + return cpumask_test_cpu(cpu, housekeeping_mask); return true; } EXPORT_SYMBOL_GPL(housekeeping_test_cpu); @@ -65,6 +67,8 @@ void __init housekeeping_init(void) cpumask_andnot(housekeeping_mask, cpu_possible_mask, tick_nohz_full_mask); + housekeeping_flags = HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC; + static_branch_enable(&housekeeping_overriden); /* We need at least one CPU to handle housekeeping work */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 562652c9c815..e9e2ebb3db29 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -777,7 +777,8 @@ void __init lockup_detector_init(void) if (tick_nohz_full_enabled()) pr_info("Disabling watchdog on nohz_full cores by default\n"); - cpumask_copy(&watchdog_cpumask, housekeeping_cpumask()); + cpumask_copy(&watchdog_cpumask, + housekeeping_cpumask(HK_FLAG_TIMER)); if (!watchdog_nmi_probe()) nmi_watchdog_available = true; -- cgit v1.2.3 From 6f1982fedd59856bcc42a9b521be4c3ffd2f60a7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:36 +0200 Subject: sched/isolation: Handle the nohz_full= parameter We want to centralize the isolation management, done by the housekeeping subsystem. Therefore we need to handle the nohz_full= parameter from there. Since nohz_full= so far has involved unbound timers, watchdog, RCU and tilegx NAPI isolation, we keep that default behaviour. nohz_full= will be deprecated in the future. We want to control the isolation features from the isolcpus= parameter. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-10-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 1 + include/linux/tick.h | 2 ++ init/Kconfig | 1 - kernel/sched/isolation.c | 42 +++++++++++++++++++++++++++++------------ kernel/time/tick-sched.c | 13 +++---------- 5 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 9bb753eece3b..e53cfa96e91e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -10,6 +10,7 @@ enum hk_flags { HK_FLAG_RCU = (1 << 1), HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), + HK_FLAG_TICK = (1 << 4), }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/tick.h b/include/linux/tick.h index 68afc09aa8ac..e2a163a9f96c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -228,6 +228,7 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); +extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } @@ -248,6 +249,7 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } +static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { } #endif static inline void tick_nohz_task_switch(void) diff --git a/init/Kconfig b/init/Kconfig index 6f52e6f4bd9d..f8564df58d0a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -474,7 +474,6 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" - depends on NO_HZ_FULL help Make sure that CPUs running critical tasks are not disturbed by any source of "noise" such as unbound workqueues, timers, kthreads... diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 37a138a780ff..1f61e440358d 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -54,23 +54,41 @@ EXPORT_SYMBOL_GPL(housekeeping_test_cpu); void __init housekeeping_init(void) { - if (!tick_nohz_full_enabled()) + if (!housekeeping_flags) return; - if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) { - WARN(1, "NO_HZ: Can't allocate not-full dynticks cpumask\n"); - cpumask_clear(tick_nohz_full_mask); - tick_nohz_full_running = false; - return; + static_branch_enable(&housekeeping_overriden); + + /* We need at least one CPU to handle housekeeping work */ + WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); +} + +#ifdef CONFIG_NO_HZ_FULL +static int __init housekeeping_nohz_full_setup(char *str) +{ + cpumask_var_t non_housekeeping_mask; + + alloc_bootmem_cpumask_var(&non_housekeeping_mask); + if (cpulist_parse(str, non_housekeeping_mask) < 0) { + pr_warn("Housekeeping: Incorrect nohz_full cpumask\n"); + free_bootmem_cpumask_var(non_housekeeping_mask); + return 0; } - cpumask_andnot(housekeeping_mask, - cpu_possible_mask, tick_nohz_full_mask); + alloc_bootmem_cpumask_var(&housekeeping_mask); + cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask); - housekeeping_flags = HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC; + if (cpumask_empty(housekeeping_mask)) + cpumask_set_cpu(smp_processor_id(), housekeeping_mask); - static_branch_enable(&housekeeping_overriden); + housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER | + HK_FLAG_RCU | HK_FLAG_MISC; - /* We need at least one CPU to handle housekeeping work */ - WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); + tick_nohz_full_setup(non_housekeeping_mask); + + free_bootmem_cpumask_var(non_housekeeping_mask); + + return 1; } +__setup("nohz_full=", housekeeping_nohz_full_setup); +#endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 27d7d522ac4e..69f3dbe38984 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -385,20 +385,13 @@ out: local_irq_restore(flags); } -/* Parse the boot-time nohz CPU list from the kernel parameters. */ -static int __init tick_nohz_full_setup(char *str) +/* Get the boot-time nohz CPU list from the kernel parameters. */ +void __init tick_nohz_full_setup(cpumask_var_t cpumask) { alloc_bootmem_cpumask_var(&tick_nohz_full_mask); - if (cpulist_parse(str, tick_nohz_full_mask) < 0) { - pr_warn("NO_HZ: Incorrect nohz_full cpumask\n"); - free_bootmem_cpumask_var(tick_nohz_full_mask); - return 1; - } + cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; - - return 1; } -__setup("nohz_full=", tick_nohz_full_setup); static int tick_nohz_cpu_down(unsigned int cpu) { -- cgit v1.2.3 From edb9382175c3ebdced8ffdb3e0f20052ad9fdbe9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:37 +0200 Subject: sched/isolation: Move isolcpus= handling to the housekeeping code We want to centralize the isolation features, to be done by the housekeeping subsystem and scheduler domain isolation is a significant part of it. No intended behaviour change, we just reuse the housekeeping cpumask and core code. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-11-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- drivers/base/cpu.c | 11 ++++++- include/linux/sched.h | 2 -- include/linux/sched/isolation.h | 1 + kernel/cgroup/cpuset.c | 15 ++++------ kernel/sched/core.c | 16 +---------- kernel/sched/isolation.c | 63 ++++++++++++++++++++++++++++++++--------- kernel/sched/topology.c | 24 ++++------------ 7 files changed, 73 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 321cd7b4d817..a73ab95558f5 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "base.h" @@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { int n = 0, len = PAGE_SIZE-2; + cpumask_var_t isolated; - n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map)); + if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) + return -ENOMEM; + + cpumask_andnot(isolated, cpu_possible_mask, + housekeeping_cpumask(HK_FLAG_DOMAIN)); + n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated)); + + free_cpumask_var(isolated); return n; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 0f897dfc195e..1b0cc0d6df8d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -165,8 +165,6 @@ struct task_group; /* Task command name length: */ #define TASK_COMM_LEN 16 -extern cpumask_var_t cpu_isolated_map; - extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index e53cfa96e91e..d849431c8060 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -11,6 +11,7 @@ enum hk_flags { HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), HK_FLAG_TICK = (1 << 4), + HK_FLAG_DOMAIN = (1 << 5), }; #ifdef CONFIG_CPU_ISOLATION diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4657e2924ecb..f7efa7b4d825 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -57,7 +57,7 @@ #include #include #include - +#include #include #include #include @@ -656,7 +656,6 @@ static int generate_sched_domains(cpumask_var_t **domains, int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ - cpumask_var_t non_isolated_cpus; /* load balanced CPUs */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ @@ -666,10 +665,6 @@ static int generate_sched_domains(cpumask_var_t **domains, dattr = NULL; csa = NULL; - if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL)) - goto done; - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { ndoms = 1; @@ -683,7 +678,7 @@ static int generate_sched_domains(cpumask_var_t **domains, update_domain_attr_tree(dattr, &top_cpuset); } cpumask_and(doms[0], top_cpuset.effective_cpus, - non_isolated_cpus); + housekeeping_cpumask(HK_FLAG_DOMAIN)); goto done; } @@ -707,7 +702,8 @@ static int generate_sched_domains(cpumask_var_t **domains, */ if (!cpumask_empty(cp->cpus_allowed) && !(is_sched_load_balance(cp) && - cpumask_intersects(cp->cpus_allowed, non_isolated_cpus))) + cpumask_intersects(cp->cpus_allowed, + housekeeping_cpumask(HK_FLAG_DOMAIN)))) continue; if (is_sched_load_balance(cp)) @@ -789,7 +785,7 @@ restart: if (apn == b->pn) { cpumask_or(dp, dp, b->effective_cpus); - cpumask_and(dp, dp, non_isolated_cpus); + cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN)); if (dattr) update_domain_attr_tree(dattr + nslot, b); @@ -802,7 +798,6 @@ restart: BUG_ON(nslot != ndoms); done: - free_cpumask_var(non_isolated_cpus); kfree(csa); /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2210c0203e51..1a55c842bfbc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -84,9 +84,6 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; -/* CPUs with isolated domains */ -cpumask_var_t cpu_isolated_map; - /* * __task_rq_lock - lock the rq @p resides on. */ @@ -5735,10 +5732,6 @@ static inline void sched_init_smt(void) { } void __init sched_init_smp(void) { - cpumask_var_t non_isolated_cpus; - - alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); - sched_init_numa(); /* @@ -5748,16 +5741,12 @@ void __init sched_init_smp(void) */ mutex_lock(&sched_domains_mutex); sched_init_domains(cpu_active_mask); - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - if (cpumask_empty(non_isolated_cpus)) - cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) + if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) BUG(); sched_init_granularity(); - free_cpumask_var(non_isolated_cpus); init_sched_rt_class(); init_sched_dl_class(); @@ -5961,9 +5950,6 @@ void __init sched_init(void) calc_load_update = jiffies + LOAD_FREQ; #ifdef CONFIG_SMP - /* May be allocated at isolcpus cmdline parse time */ - if (cpu_isolated_map == NULL) - zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); idle_thread_set_boot_cpu(); set_cpu_rq_start_time(smp_processor_id()); #endif diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 1f61e440358d..8f666bc5abe8 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -63,32 +63,69 @@ void __init housekeeping_init(void) WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } -#ifdef CONFIG_NO_HZ_FULL -static int __init housekeeping_nohz_full_setup(char *str) +static int __init housekeeping_setup(char *str, enum hk_flags flags) { cpumask_var_t non_housekeeping_mask; + int err; alloc_bootmem_cpumask_var(&non_housekeeping_mask); - if (cpulist_parse(str, non_housekeeping_mask) < 0) { - pr_warn("Housekeeping: Incorrect nohz_full cpumask\n"); + err = cpulist_parse(str, non_housekeeping_mask); + if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) { + pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n"); free_bootmem_cpumask_var(non_housekeeping_mask); return 0; } - alloc_bootmem_cpumask_var(&housekeeping_mask); - cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask); - - if (cpumask_empty(housekeeping_mask)) - cpumask_set_cpu(smp_processor_id(), housekeeping_mask); + if (!housekeeping_flags) { + alloc_bootmem_cpumask_var(&housekeeping_mask); + cpumask_andnot(housekeeping_mask, + cpu_possible_mask, non_housekeeping_mask); + if (cpumask_empty(housekeeping_mask)) + cpumask_set_cpu(smp_processor_id(), housekeeping_mask); + } else { + cpumask_var_t tmp; + + alloc_bootmem_cpumask_var(&tmp); + cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask); + if (!cpumask_equal(tmp, housekeeping_mask)) { + pr_warn("Housekeeping: nohz_full= must match isolcpus=\n"); + free_bootmem_cpumask_var(tmp); + free_bootmem_cpumask_var(non_housekeeping_mask); + return 0; + } + free_bootmem_cpumask_var(tmp); + } - housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER | - HK_FLAG_RCU | HK_FLAG_MISC; + if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) { + if (IS_ENABLED(CONFIG_NO_HZ_FULL)) { + tick_nohz_full_setup(non_housekeeping_mask); + } else { + pr_warn("Housekeeping: nohz unsupported." + " Build with CONFIG_NO_HZ_FULL\n"); + free_bootmem_cpumask_var(non_housekeeping_mask); + return 0; + } + } - tick_nohz_full_setup(non_housekeeping_mask); + housekeeping_flags |= flags; free_bootmem_cpumask_var(non_housekeeping_mask); return 1; } + +static int __init housekeeping_nohz_full_setup(char *str) +{ + unsigned int flags; + + flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC; + + return housekeeping_setup(str, flags); +} __setup("nohz_full=", housekeeping_nohz_full_setup); -#endif + +static int __init housekeeping_isolcpus_setup(char *str) +{ + return housekeeping_setup(str, HK_FLAG_DOMAIN); +} +__setup("isolcpus=", housekeeping_isolcpus_setup); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index e3d31b0880dc..2e6b9126ffdc 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -3,6 +3,7 @@ */ #include #include +#include #include "sched.h" @@ -469,21 +470,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) update_top_cache_domain(cpu); } -/* Setup the mask of CPUs configured for isolated domains */ -static int __init isolated_cpu_setup(char *str) -{ - int ret; - - alloc_bootmem_cpumask_var(&cpu_isolated_map); - ret = cpulist_parse(str, cpu_isolated_map); - if (ret || cpumask_last(cpu_isolated_map) >= nr_cpu_ids) { - pr_err("sched: Error, all isolcpus= values must be between 0 and %u - ignoring them.\n", nr_cpu_ids-1); - return 0; - } - return 1; -} -__setup("isolcpus=", isolated_cpu_setup); - struct s_data { struct sched_domain ** __percpu sd; struct root_domain *rd; @@ -1792,7 +1778,7 @@ int sched_init_domains(const struct cpumask *cpu_map) doms_cur = alloc_sched_domains(ndoms_cur); if (!doms_cur) doms_cur = &fallback_doms; - cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); + cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN)); err = build_sched_domains(doms_cur[0], NULL); register_sched_domain_sysctl(); @@ -1875,7 +1861,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], doms_new = alloc_sched_domains(1); if (doms_new) { n = 1; - cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); + cpumask_and(doms_new[0], cpu_active_mask, + housekeeping_cpumask(HK_FLAG_DOMAIN)); } } else { n = ndoms_new; @@ -1898,7 +1885,8 @@ match1: if (!doms_new) { n = 0; doms_new = &fallback_doms; - cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); + cpumask_and(doms_new[0], cpu_active_mask, + housekeeping_cpumask(HK_FLAG_DOMAIN)); } /* Build new domains: */ -- cgit v1.2.3 From a9903f04e0a4ea522d959c2f287cdf0ab029e324 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 30 Oct 2017 11:08:16 -0700 Subject: sched/sysctl: Fix attributes of some extern declarations The definition of sysctl_sched_migration_cost, sysctl_sched_nr_migrate and sysctl_sched_time_avg includes the attribute const_debug. This attribute is not part of the extern declaration of these variables in include/linux/sched/sysctl.h, while it is in kernel/sched/sched.h, and as a result Clang generates warnings like this: kernel/sched/sched.h:1618:33: warning: section attribute is specified on redeclared variable [-Wsection] extern const_debug unsigned int sysctl_sched_time_avg; ^ ./include/linux/sched/sysctl.h:42:21: note: previous declaration is here extern unsigned int sysctl_sched_time_avg; The header only declares the variables when CONFIG_SCHED_DEBUG is defined, therefore it is not necessary to duplicate the definition of const_debug. Instead we can use the attribute __read_mostly, which is the expansion of const_debug when CONFIG_SCHED_DEBUG=y is set. Signed-off-by: Matthias Kaehlcke Reviewed-by: Nick Desaulniers Cc: Douglas Anderson Cc: Guenter Roeck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shile Zhang Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171030180816.170850-1-mka@chromium.org Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0f5ecd4d298e..d34c823f3d36 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -37,9 +37,9 @@ extern unsigned int sysctl_numa_balancing_scan_period_max; extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; +extern __read_mostly unsigned int sysctl_sched_migration_cost; +extern __read_mostly unsigned int sysctl_sched_nr_migrate; +extern __read_mostly unsigned int sysctl_sched_time_avg; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, -- cgit v1.2.3