From c004d231cac709f09987f2a6dd733013039d27c3 Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Wed, 19 Oct 2022 08:36:50 -0400 Subject: rcu: Use hlist_nulls_next_rcu() in hlist_nulls_add_tail_rcu() In commit 8dbd76e79a16 ("tcp/dccp: fix possible race __inet_lookup_established()"), function hlist_nulls_add_tail_rcu() was added back, but the local variable *last* is of type hlist_nulls_node, so use hlist_nulls_next_rcu() instead of hlist_next_rcu(). Signed-off-by: Zhao Mengmeng Signed-off-by: Paul E. McKenney --- include/linux/rculist_nulls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index d8afdb8784c1..ba4c00dd8005 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -139,7 +139,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, if (last) { n->next = last->next; n->pprev = &last->next; - rcu_assign_pointer(hlist_next_rcu(last), n); + rcu_assign_pointer(hlist_nulls_next_rcu(last), n); } else { hlist_nulls_add_head_rcu(n, h); } -- cgit v1.2.3 From 0cae5ded535c3a80aed94f119bbd4ee3ae284a65 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Dec 2022 11:41:44 -0800 Subject: rcu: Make RCU_LOCKDEP_WARN() avoid early lockdep checks Currently, RCU_LOCKDEP_WARN() checks the condition before checking to see if lockdep is still enabled. This is necessary to avoid the false-positive splats fixed by commit 3066820034b5dd ("rcu: Reject RCU_LOCKDEP_WARN() false positives"). However, the current state can result in false-positive splats during early boot before lockdep is fully initialized. This commit therefore checks debug_lockdep_rcu_enabled() both before and after checking the condition, thus avoiding both sets of false-positive error reports. Reported-by: Steven Rostedt Reported-by: Masami Hiramatsu (Google) Reported-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney Reviewed-by: Mathieu Desnoyers Cc: Boqun Feng Cc: Matthew Wilcox Cc: Thomas Gleixner --- include/linux/rcupdate.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03abf883a281..aa86de01aab6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -374,11 +374,18 @@ static inline int debug_lockdep_rcu_enabled(void) * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met * @c: condition to check * @s: informative message + * + * This checks debug_lockdep_rcu_enabled() before checking (c) to + * prevent early boot splats due to lockdep not yet being initialized, + * and rechecks it after checking (c) to prevent false-positive splats + * due to races with lockdep being disabled. See commit 3066820034b5dd + * ("rcu: Reject RCU_LOCKDEP_WARN() false positives") for more detail. */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ static bool __section(".data.unlikely") __warned; \ - if ((c) && debug_lockdep_rcu_enabled() && !__warned) { \ + if (debug_lockdep_rcu_enabled() && (c) && \ + debug_lockdep_rcu_enabled() && !__warned) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ -- cgit v1.2.3 From 04a522b7da3dbc083f8ae0aa1a6184b959a8f81c Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 25 Oct 2022 16:46:12 +0200 Subject: rcu: Refactor kvfree_call_rcu() and high-level helpers Currently a kvfree_call_rcu() takes an offset within a structure as a second parameter, so a helper such as a kvfree_rcu_arg_2() has to convert rcu_head and a freed ptr to an offset in order to pass it. That leads to an extra conversion on macro entry. Instead of converting, refactor the code in way that a pointer that has to be freed is passed directly to the kvfree_call_rcu(). This patch does not make any functional change and is transparent to all kvfree_rcu() users. Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 5 ++--- include/linux/rcutiny.h | 12 ++++++------ include/linux/rcutree.h | 2 +- kernel/rcu/tiny.c | 9 +++------ kernel/rcu/tree.c | 29 ++++++++++++----------------- 5 files changed, 24 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03abf883a281..f38d4469d7f3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1011,8 +1011,7 @@ do { \ \ if (___p) { \ BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \ - kvfree_call_rcu(&((___p)->rhf), (rcu_callback_t)(unsigned long) \ - (offsetof(typeof(*(ptr)), rhf))); \ + kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ } \ } while (0) @@ -1021,7 +1020,7 @@ do { \ typeof(ptr) ___p = (ptr); \ \ if (___p) \ - kvfree_call_rcu(NULL, (rcu_callback_t) (___p)); \ + kvfree_call_rcu(NULL, (void *) (___p)); \ } while (0) /* diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 68f9070aa111..7f17acf29dda 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -98,25 +98,25 @@ static inline void synchronize_rcu_expedited(void) */ extern void kvfree(const void *addr); -static inline void __kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr) { if (head) { - call_rcu(head, func); + call_rcu(head, (rcu_callback_t) ((void *) head - ptr)); return; } // kvfree_rcu(one_arg) call. might_sleep(); synchronize_rcu(); - kvfree((void *) func); + kvfree(ptr); } #ifdef CONFIG_KASAN_GENERIC -void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +void kvfree_call_rcu(struct rcu_head *head, void *ptr); #else -static inline void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr) { - __kvfree_call_rcu(head, func); + __kvfree_call_rcu(head, ptr); } #endif diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 4003bf6cfa1c..56bccb5a8fde 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -33,7 +33,7 @@ static inline void rcu_virt_note_context_switch(void) } void synchronize_rcu_expedited(void); -void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func); +void kvfree_call_rcu(struct rcu_head *head, void *ptr); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 72913ce21258..42f7589e51e0 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -246,15 +246,12 @@ bool poll_state_synchronize_rcu(unsigned long oldstate) EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu); #ifdef CONFIG_KASAN_GENERIC -void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +void kvfree_call_rcu(struct rcu_head *head, void *ptr) { - if (head) { - void *ptr = (void *) head - (unsigned long) func; - + if (head) kasan_record_aux_stack_noalloc(ptr); - } - __kvfree_call_rcu(head, func); + __kvfree_call_rcu(head, ptr); } EXPORT_SYMBOL_GPL(kvfree_call_rcu); #endif diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cf34a961821a..7d222acd85bf 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3103,8 +3103,8 @@ static void kfree_rcu_work(struct work_struct *work) * This list is named "Channel 3". */ for (; head; head = next) { - unsigned long offset = (unsigned long)head->func; - void *ptr = (void *)head - offset; + void *ptr = (void *) head->func; + unsigned long offset = (void *) head - ptr; next = head->next; debug_rcu_head_unqueue((struct rcu_head *)ptr); @@ -3342,26 +3342,21 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp, * be free'd in workqueue context. This allows us to: batch requests together to * reduce the number of grace periods during heavy kfree_rcu()/kvfree_rcu() load. */ -void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) +void kvfree_call_rcu(struct rcu_head *head, void *ptr) { unsigned long flags; struct kfree_rcu_cpu *krcp; bool success; - void *ptr; - if (head) { - ptr = (void *) head - (unsigned long) func; - } else { - /* - * Please note there is a limitation for the head-less - * variant, that is why there is a clear rule for such - * objects: it can be used from might_sleep() context - * only. For other places please embed an rcu_head to - * your data. - */ + /* + * Please note there is a limitation for the head-less + * variant, that is why there is a clear rule for such + * objects: it can be used from might_sleep() context + * only. For other places please embed an rcu_head to + * your data. + */ + if (!head) might_sleep(); - ptr = (unsigned long *) func; - } // Queue the object but don't yet schedule the batch. if (debug_rcu_head_queue(ptr)) { @@ -3382,7 +3377,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) // Inline if kvfree_rcu(one_arg) call. goto unlock_return; - head->func = func; + head->func = ptr; head->next = krcp->head; krcp->head = head; success = true; -- cgit v1.2.3 From aa5210f524edcebf909e6576b515bc3e2d82af0d Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Wed, 16 Nov 2022 09:52:43 +0800 Subject: srcu: Fix a misspelling in comment s/srcu_gq_seq/srcu_gp_seq/ Signed-off-by: Pingfan Liu Cc: Lai Jiangshan Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Reviewed-by: Mukesh Ojha Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index c689a81752c9..558057b517b7 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -49,7 +49,7 @@ struct srcu_data { struct srcu_node { spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ - /* if greater than ->srcu_gq_seq. */ + /* if greater than ->srcu_gp_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ struct srcu_node *srcu_parent; /* Next up in tree. */ -- cgit v1.2.3 From 0b1182bde303dc476ea9712c2c816be2e4f0cf81 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 23 Nov 2022 15:49:55 -0800 Subject: rcu: Add srcu_down_read() and srcu_up_read() A pair of matching srcu_read_lock() and srcu_read_unlock() invocations must take place within the same context, for example, within the same task. Otherwise, lockdep complains, as is the right thing to do for most use cases. However, there are use cases involving asynchronous I/O where the SRCU reader needs to begin on one task and end on another. This commit therefore supplies the semaphore-like srcu_down_read() and srcu_up_read(), which act like srcu_read_lock() and srcu_read_unlock(), but permitting srcu_up_read() to be invoked in a different context than was the matching srcu_down_read(). Neither srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler. Reported-by: Jan Kara Signed-off-by: Paul E. McKenney Tested-by: Amir Goldstein --- include/linux/srcu.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9b9d0bbf1d3c..74796cd7e7a9 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -214,6 +214,34 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) return retval; } +/** + * srcu_down_read - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter a semaphore-like SRCU read-side critical section. Note that + * SRCU read-side critical sections may be nested. However, it is + * illegal to call anything that waits on an SRCU grace period for the + * same srcu_struct, whether directly or indirectly. Please note that + * one way to indirectly wait on an SRCU grace period is to acquire + * a mutex that is held elsewhere while calling synchronize_srcu() or + * synchronize_srcu_expedited(). But if you want lockdep to help you + * keep this stuff straight, you should instead use srcu_read_lock(). + * + * The semaphore-like nature of srcu_down_read() means that the matching + * srcu_up_read() can be invoked from some other context, for example, + * from some other task or from an irq handler. However, neither + * srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler. + * + * Calls to srcu_down_read() may be nested, similar to the manner in + * which calls to down_read() may be nested. + */ +static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) +{ + WARN_ON_ONCE(in_nmi()); + srcu_check_nmi_safety(ssp, false); + return __srcu_read_lock(ssp); +} + /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. @@ -254,6 +282,23 @@ srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) __srcu_read_unlock(ssp, idx); } +/** + * srcu_up_read - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section, but not necessarily from + * the same context as the maching srcu_down_read(). + */ +static inline void srcu_up_read(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + WARN_ON_ONCE(in_nmi()); + srcu_check_nmi_safety(ssp, false); + __srcu_read_unlock(ssp, idx); +} + /** * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock * -- cgit v1.2.3 From 47904aed898a08f028572b9b5a5cc101ddfb2d82 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 19 Nov 2022 17:25:03 +0800 Subject: genirq: Fix the return type of kstat_cpu_irqs_sum() The type of member ->irqs_sum is unsigned long, but kstat_cpu_irqs_sum() returns int, which can result in truncation. Therefore, change the kstat_cpu_irqs_sum() function's return value to unsigned long to avoid truncation. Fixes: f2c66cd8eedd ("/proc/stat: scalability of irq num per cpu") Reported-by: Elliott, Robert (Servers) Signed-off-by: Zhen Lei Cc: Tejun Heo Cc: "Peter Zijlstra (Intel)" Cc: Josh Don Cc: Andrew Morton Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ddb5a358fd82..90e2fdc17d79 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -75,7 +75,7 @@ extern unsigned int kstat_irqs_usr(unsigned int irq); /* * Number of interrupts per cpu, since bootup */ -static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) +static inline unsigned long kstat_cpu_irqs_sum(unsigned int cpu) { return kstat_cpu(cpu).irqs_sum; } -- cgit v1.2.3 From 28319d6dc5e2ffefa452c2377dd0f71621b5bff0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 25 Nov 2022 14:55:00 +0100 Subject: rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes() RCU Tasks and PID-namespace unshare can interact in do_exit() in a complicated circular dependency: 1) TASK A calls unshare(CLONE_NEWPID), this creates a new PID namespace that every subsequent child of TASK A will belong to. But TASK A doesn't itself belong to that new PID namespace. 2) TASK A forks() and creates TASK B. TASK A stays attached to its PID namespace (let's say PID_NS1) and TASK B is the first task belonging to the new PID namespace created by unshare() (let's call it PID_NS2). 3) Since TASK B is the first task attached to PID_NS2, it becomes the PID_NS2 child reaper. 4) TASK A forks() again and creates TASK C which get attached to PID_NS2. Note how TASK C has TASK A as a parent (belonging to PID_NS1) but has TASK B (belonging to PID_NS2) as a pid_namespace child_reaper. 5) TASK B exits and since it is the child reaper for PID_NS2, it has to kill all other tasks attached to PID_NS2, and wait for all of them to die before getting reaped itself (zap_pid_ns_process()). 6) TASK A calls synchronize_rcu_tasks() which leads to synchronize_srcu(&tasks_rcu_exit_srcu). 7) TASK B is waiting for TASK C to get reaped. But TASK B is under a tasks_rcu_exit_srcu SRCU critical section (exit_notify() is between exit_tasks_rcu_start() and exit_tasks_rcu_finish()), blocking TASK A. 8) TASK C exits and since TASK A is its parent, it waits for it to reap TASK C, but it can't because TASK A waits for TASK B that waits for TASK C. Pid_namespace semantics can hardly be changed at this point. But the coverage of tasks_rcu_exit_srcu can be reduced instead. The current task is assumed not to be concurrently reapable at this stage of exit_notify() and therefore tasks_rcu_exit_srcu can be temporarily relaxed without breaking its constraints, providing a way out of the deadlock scenario. [ paulmck: Fix build failure by adding additional declaration. ] Fixes: 3f95aa81d265 ("rcu: Make TASKS_RCU handle tasks that are almost done exiting") Reported-by: Pengfei Xu Suggested-by: Boqun Feng Suggested-by: Neeraj Upadhyay Suggested-by: Paul E. McKenney Cc: Oleg Nesterov Cc: Lai Jiangshan Cc: Eric W . Biederman Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 ++ kernel/pid_namespace.c | 17 +++++++++++++++++ kernel/rcu/tasks.h | 15 +++++++++++++-- 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 03abf883a281..c0c79beac3fe 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -238,6 +238,7 @@ void synchronize_rcu_tasks_rude(void); #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); +void exit_tasks_rcu_stop(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_classic_qs(t, preempt) do { } while (0) @@ -246,6 +247,7 @@ void exit_tasks_rcu_finish(void); #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } +static inline void exit_tasks_rcu_stop(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index f4f8cb0435b4..fc21c5d5fd5d 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -244,7 +244,24 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) set_current_state(TASK_INTERRUPTIBLE); if (pid_ns->pid_allocated == init_pids) break; + /* + * Release tasks_rcu_exit_srcu to avoid following deadlock: + * + * 1) TASK A unshare(CLONE_NEWPID) + * 2) TASK A fork() twice -> TASK B (child reaper for new ns) + * and TASK C + * 3) TASK B exits, kills TASK C, waits for TASK A to reap it + * 4) TASK A calls synchronize_rcu_tasks() + * -> synchronize_srcu(tasks_rcu_exit_srcu) + * 5) *DEADLOCK* + * + * It is considered safe to release tasks_rcu_exit_srcu here + * because we assume the current task can not be concurrently + * reaped at this point. + */ + exit_tasks_rcu_stop(); schedule(); + exit_tasks_rcu_start(); } __set_current_state(TASK_RUNNING); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index fbaed2637a7f..5de61f12a164 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1016,16 +1016,27 @@ void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) * task is exiting and may be removed from the tasklist. See * corresponding synchronize_srcu() for further details. */ -void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) +void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu) { struct task_struct *t = current; __srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx); - exit_tasks_rcu_finish_trace(t); +} + +/* + * Contribute to protect against tasklist scan blind spot while the + * task is exiting and may be removed from the tasklist. See + * corresponding synchronize_srcu() for further details. + */ +void exit_tasks_rcu_finish(void) +{ + exit_tasks_rcu_stop(); + exit_tasks_rcu_finish_trace(current); } #else /* #ifdef CONFIG_TASKS_RCU */ void exit_tasks_rcu_start(void) { } +void exit_tasks_rcu_stop(void) { } void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } #endif /* #else #ifdef CONFIG_TASKS_RCU */ -- cgit v1.2.3 From 3ca0a6ea8f6de0f21a9331ff3596a9c073fbdab0 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 19 Nov 2022 17:25:04 +0800 Subject: sched: Add helper kstat_cpu_softirqs_sum() Add a kstat_cpu_softirqs_sum() function that is similar to kstat_cpu_irqs_sum(), but which counts software interrupts since boot on the specified CPU. Signed-off-by: Zhen Lei Cc: Josh Don Cc: Tejun Heo Cc: Peter Zijlstra Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/kernel_stat.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 90e2fdc17d79..898076e173a9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -67,6 +67,17 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) return kstat_cpu(cpu).softirqs[irq]; } +static inline unsigned int kstat_cpu_softirqs_sum(int cpu) +{ + int i; + unsigned int sum = 0; + + for (i = 0; i < NR_SOFTIRQS; i++) + sum += kstat_softirqs_cpu(i, cpu); + + return sum; +} + /* * Number of interrupts per specific IRQ source, since bootup */ -- cgit v1.2.3 From 7c182722a0a9447e31f9645de4f311e5bc59b480 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 19 Nov 2022 17:25:05 +0800 Subject: sched: Add helper nr_context_switches_cpu() Add a function nr_context_switches_cpu() that returns number of context switches since boot on the specified CPU. This information will be used to diagnose RCU CPU stalls. Signed-off-by: Zhen Lei Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira Cc: Valentin Schneider Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/kernel_stat.h | 1 + kernel/sched/core.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 898076e173a9..9935f7ecbfb9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -52,6 +52,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); #define kstat_cpu(cpu) per_cpu(kstat, cpu) #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) +extern unsigned long long nr_context_switches_cpu(int cpu); extern unsigned long long nr_context_switches(void); extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f..2e40a6c116e1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5282,6 +5282,11 @@ bool single_task_running(void) } EXPORT_SYMBOL(single_task_running); +unsigned long long nr_context_switches_cpu(int cpu) +{ + return cpu_rq(cpu)->nr_switches; +} + unsigned long long nr_context_switches(void) { int i; -- cgit v1.2.3 From 608723c41cd951fb32ade2f8371e61c270816175 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 1 Feb 2023 16:08:07 +0100 Subject: rcu/kvfree: Add kvfree_rcu_mightsleep() and kfree_rcu_mightsleep() The kvfree_rcu() and kfree_rcu() APIs are hazardous in that if you forget the second argument, it works, but might sleep. This sleeping can be a correctness bug from atomic contexts, and even in non-atomic contexts it might introduce unacceptable latencies. This commit therefore adds kvfree_rcu_mightsleep() and kfree_rcu_mightsleep(), which will replace the single-argument kvfree_rcu() and kfree_rcu(), respectively. This commit enables a series of commits that switch from single-argument kvfree_rcu() and kfree_rcu() to their _mightsleep() counterparts. Once all of these commits land, the single-argument versions will be removed. Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f38d4469d7f3..84433600885a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1004,6 +1004,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__, \ kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__) +#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) +#define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr) + #define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME #define kvfree_rcu_arg_2(ptr, rhf) \ do { \ -- cgit v1.2.3