summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-27 02:42:43 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-27 02:42:43 +0300
commit1ae78780eda54023a0fb49ee743dbba39da148e0 (patch)
tree615313e65083cc30aca62cba3c717331f53002a6 /kernel
parent77a05940eee7e9891cd6add7a690a3e762ee21b0 (diff)
parent43e0ae7ae0f567a3f8c10ec7a4078bc482660921 (diff)
downloadlinux-1ae78780eda54023a0fb49ee743dbba39da148e0.tar.xz
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar: "The main changes in this cycle were: - Dynamic tick (nohz) updates, perhaps most notably changes to force the tick on when needed due to lengthy in-kernel execution on CPUs on which RCU is waiting. - Linux-kernel memory consistency model updates. - Replace rcu_swap_protected() with rcu_prepace_pointer(). - Torture-test updates. - Documentation updates. - Miscellaneous fixes" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits) security/safesetid: Replace rcu_swap_protected() with rcu_replace_pointer() net/sched: Replace rcu_swap_protected() with rcu_replace_pointer() net/netfilter: Replace rcu_swap_protected() with rcu_replace_pointer() net/core: Replace rcu_swap_protected() with rcu_replace_pointer() bpf/cgroup: Replace rcu_swap_protected() with rcu_replace_pointer() fs/afs: Replace rcu_swap_protected() with rcu_replace_pointer() drivers/scsi: Replace rcu_swap_protected() with rcu_replace_pointer() drm/i915: Replace rcu_swap_protected() with rcu_replace_pointer() x86/kvm/pmu: Replace rcu_swap_protected() with rcu_replace_pointer() rcu: Upgrade rcu_swap_protected() to rcu_replace_pointer() rcu: Suppress levelspread uninitialized messages rcu: Fix uninitialized variable in nocb_gp_wait() rcu: Update descriptions for rcu_future_grace_period tracepoint rcu: Update descriptions for rcu_nocb_wake tracepoint rcu: Remove obsolete descriptions for rcu_barrier tracepoint rcu: Ensure that ->rcu_urgent_qs is set before resched IPI workqueue: Convert for_each_wq to use built-in list check rcu: Several rcu_segcblist functions can be static rcu: Remove unused function hlist_bl_del_init_rcu() Documentation: Rename rcu_node_context_switch() to rcu_note_context_switch() ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cgroup.c4
-rw-r--r--kernel/locking/locktorture.c9
-rw-r--r--kernel/rcu/rcu.h4
-rw-r--r--kernel/rcu/rcu_segcblist.c6
-rw-r--r--kernel/rcu/rcuperf.c16
-rw-r--r--kernel/rcu/rcutorture.c44
-rw-r--r--kernel/rcu/tree.c73
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/stop_machine.c1
-rw-r--r--kernel/time/tick-sched.c11
-rw-r--r--kernel/workqueue.c9
12 files changed, 113 insertions, 67 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index a3eaf08e7dd3..9f90d3c92bda 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -180,8 +180,8 @@ static void activate_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array *old_array)
{
- rcu_swap_protected(cgrp->bpf.effective[type], old_array,
- lockdep_is_held(&cgroup_mutex));
+ old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,
+ lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
*/
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index c513031cd7e3..99475a66c94f 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -16,7 +16,6 @@
#include <linux/kthread.h>
#include <linux/sched/rt.h>
#include <linux/spinlock.h>
-#include <linux/rwlock.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/smp.h>
@@ -889,16 +888,16 @@ static int __init lock_torture_init(void)
cxt.nrealwriters_stress = 2 * num_online_cpus();
#ifdef CONFIG_DEBUG_MUTEXES
- if (strncmp(torture_type, "mutex", 5) == 0)
+ if (str_has_prefix(torture_type, "mutex"))
cxt.debug_lock = true;
#endif
#ifdef CONFIG_DEBUG_RT_MUTEXES
- if (strncmp(torture_type, "rtmutex", 7) == 0)
+ if (str_has_prefix(torture_type, "rtmutex"))
cxt.debug_lock = true;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
- if ((strncmp(torture_type, "spin", 4) == 0) ||
- (strncmp(torture_type, "rw_lock", 7) == 0))
+ if ((str_has_prefix(torture_type, "spin")) ||
+ (str_has_prefix(torture_type, "rw_lock")))
cxt.debug_lock = true;
#endif
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 8fd4f82c9b3d..ab504fbc76ca 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -299,6 +299,8 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
{
int i;
+ for (i = 0; i < RCU_NUM_LVLS; i++)
+ levelspread[i] = INT_MIN;
if (rcu_fanout_exact) {
levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
for (i = rcu_num_lvls - 2; i >= 0; i--)
@@ -455,7 +457,6 @@ enum rcutorture_type {
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
unsigned long *gp_seq);
-void rcutorture_record_progress(unsigned long vernum);
void do_trace_rcu_torture_read(const char *rcutorturename,
struct rcu_head *rhp,
unsigned long secs,
@@ -468,7 +469,6 @@ static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
*flags = 0;
*gp_seq = 0;
}
-static inline void rcutorture_record_progress(unsigned long vernum) { }
#ifdef CONFIG_RCU_TRACE
void do_trace_rcu_torture_read(const char *rcutorturename,
struct rcu_head *rhp,
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 495c58ce1640..cbc87b804db9 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -88,7 +88,7 @@ struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
}
/* Set the length of an rcu_segcblist structure. */
-void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
+static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
{
#ifdef CONFIG_RCU_NOCB_CPU
atomic_long_set(&rsclp->len, v);
@@ -104,7 +104,7 @@ void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
* This increase is fully ordered with respect to the callers accesses
* both before and after.
*/
-void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
+static void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
{
#ifdef CONFIG_RCU_NOCB_CPU
smp_mb__before_atomic(); /* Up to the caller! */
@@ -134,7 +134,7 @@ void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp)
* with the actual number of callbacks on the structure. This exchange is
* fully ordered with respect to the callers accesses both before and after.
*/
-long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v)
+static long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v)
{
#ifdef CONFIG_RCU_NOCB_CPU
return atomic_long_xchg(&rsclp->len, v);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 5a879d073c1c..5f884d560384 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -109,15 +109,6 @@ static unsigned long b_rcu_perf_writer_started;
static unsigned long b_rcu_perf_writer_finished;
static DEFINE_PER_CPU(atomic_t, n_async_inflight);
-static int rcu_perf_writer_state;
-#define RTWS_INIT 0
-#define RTWS_ASYNC 1
-#define RTWS_BARRIER 2
-#define RTWS_EXP_SYNC 3
-#define RTWS_SYNC 4
-#define RTWS_IDLE 5
-#define RTWS_STOPPING 6
-
#define MAX_MEAS 10000
#define MIN_MEAS 100
@@ -404,25 +395,20 @@ retry:
if (!rhp)
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
- rcu_perf_writer_state = RTWS_ASYNC;
atomic_inc(this_cpu_ptr(&n_async_inflight));
cur_ops->async(rhp, rcu_perf_async_cb);
rhp = NULL;
} else if (!kthread_should_stop()) {
- rcu_perf_writer_state = RTWS_BARRIER;
cur_ops->gp_barrier();
goto retry;
} else {
kfree(rhp); /* Because we are stopping. */
}
} else if (gp_exp) {
- rcu_perf_writer_state = RTWS_EXP_SYNC;
cur_ops->exp_sync();
} else {
- rcu_perf_writer_state = RTWS_SYNC;
cur_ops->sync();
}
- rcu_perf_writer_state = RTWS_IDLE;
t = ktime_get_mono_fast_ns();
*wdp = t - *wdp;
i_max = i;
@@ -463,10 +449,8 @@ retry:
rcu_perf_wait_shutdown();
} while (!torture_must_stop());
if (gp_async) {
- rcu_perf_writer_state = RTWS_BARRIER;
cur_ops->gp_barrier();
}
- rcu_perf_writer_state = RTWS_STOPPING;
writer_n_durations[me] = i_max;
torture_kthread_stopping("rcu_perf_writer");
return 0;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 3c9feca1eab1..dee043feb71f 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -44,6 +44,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/sysctl.h>
#include <linux/oom.h>
+#include <linux/tick.h>
#include "rcu.h"
@@ -1363,15 +1364,15 @@ rcu_torture_reader(void *arg)
set_user_nice(current, MAX_NICE);
if (irqreader && cur_ops->irq_capable)
timer_setup_on_stack(&t, rcu_torture_timer, 0);
-
+ tick_dep_set_task(current, TICK_DEP_BIT_RCU);
do {
if (irqreader && cur_ops->irq_capable) {
if (!timer_pending(&t))
mod_timer(&t, jiffies + 1);
}
- if (!rcu_torture_one_read(&rand))
+ if (!rcu_torture_one_read(&rand) && !torture_must_stop())
schedule_timeout_interruptible(HZ);
- if (time_after(jiffies, lastsleep)) {
+ if (time_after(jiffies, lastsleep) && !torture_must_stop()) {
schedule_timeout_interruptible(1);
lastsleep = jiffies + 10;
}
@@ -1383,6 +1384,7 @@ rcu_torture_reader(void *arg)
del_timer_sync(&t);
destroy_timer_on_stack(&t);
}
+ tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
torture_kthread_stopping("rcu_torture_reader");
return 0;
}
@@ -1442,15 +1444,18 @@ rcu_torture_stats_print(void)
n_rcu_torture_barrier_error);
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
- if (atomic_read(&n_rcu_torture_mberror) != 0 ||
- n_rcu_torture_barrier_error != 0 ||
- n_rcu_torture_boost_ktrerror != 0 ||
- n_rcu_torture_boost_rterror != 0 ||
- n_rcu_torture_boost_failure != 0 ||
+ if (atomic_read(&n_rcu_torture_mberror) ||
+ n_rcu_torture_barrier_error || n_rcu_torture_boost_ktrerror ||
+ n_rcu_torture_boost_rterror || n_rcu_torture_boost_failure ||
i > 1) {
pr_cont("%s", "!!! ");
atomic_inc(&n_rcu_torture_error);
- WARN_ON_ONCE(1);
+ WARN_ON_ONCE(atomic_read(&n_rcu_torture_mberror));
+ WARN_ON_ONCE(n_rcu_torture_barrier_error); // rcu_barrier()
+ WARN_ON_ONCE(n_rcu_torture_boost_ktrerror); // no boost kthread
+ WARN_ON_ONCE(n_rcu_torture_boost_rterror); // can't set RT prio
+ WARN_ON_ONCE(n_rcu_torture_boost_failure); // RCU boost failed
+ WARN_ON_ONCE(i > 1); // Too-short grace period
}
pr_cont("Reader Pipe: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -1729,10 +1734,10 @@ static void rcu_torture_fwd_prog_cond_resched(unsigned long iter)
// Real call_rcu() floods hit userspace, so emulate that.
if (need_resched() || (iter & 0xfff))
schedule();
- } else {
- // No userspace emulation: CB invocation throttles call_rcu()
- cond_resched();
+ return;
}
+ // No userspace emulation: CB invocation throttles call_rcu()
+ cond_resched();
}
/*
@@ -1759,6 +1764,11 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void)
kfree(rfcp);
freed++;
rcu_torture_fwd_prog_cond_resched(freed);
+ if (tick_nohz_full_enabled()) {
+ local_irq_save(flags);
+ rcu_momentary_dyntick_idle();
+ local_irq_restore(flags);
+ }
}
return freed;
}
@@ -1803,7 +1813,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
udelay(10);
cur_ops->readunlock(idx);
if (!fwd_progress_need_resched || need_resched())
- rcu_torture_fwd_prog_cond_resched(1);
+ cond_resched();
}
(*tested_tries)++;
if (!time_before(jiffies, stopat) &&
@@ -1833,6 +1843,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
static void rcu_torture_fwd_prog_cr(void)
{
unsigned long cver;
+ unsigned long flags;
unsigned long gps;
int i;
long n_launders;
@@ -1865,6 +1876,7 @@ static void rcu_torture_fwd_prog_cr(void)
cver = READ_ONCE(rcu_torture_current_version);
gps = cur_ops->get_gp_seq();
rcu_launder_gp_seq_start = gps;
+ tick_dep_set_task(current, TICK_DEP_BIT_RCU);
while (time_before(jiffies, stopat) &&
!shutdown_time_arrived() &&
!READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) {
@@ -1891,6 +1903,11 @@ static void rcu_torture_fwd_prog_cr(void)
}
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
rcu_torture_fwd_prog_cond_resched(n_launders + n_max_cbs);
+ if (tick_nohz_full_enabled()) {
+ local_irq_save(flags);
+ rcu_momentary_dyntick_idle();
+ local_irq_restore(flags);
+ }
}
stoppedat = jiffies;
n_launders_cb_snap = READ_ONCE(n_launders_cb);
@@ -1911,6 +1928,7 @@ static void rcu_torture_fwd_prog_cr(void)
rcu_torture_fwd_cb_hist();
}
schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
+ tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
WRITE_ONCE(rcu_fwd_cb_nodelay, false);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 81105141b6a8..1694a6b57ad8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -364,7 +364,7 @@ bool rcu_eqs_special_set(int cpu)
*
* The caller must have disabled interrupts and must not be idle.
*/
-static void __maybe_unused rcu_momentary_dyntick_idle(void)
+void rcu_momentary_dyntick_idle(void)
{
int special;
@@ -375,6 +375,7 @@ static void __maybe_unused rcu_momentary_dyntick_idle(void)
WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
rcu_preempt_deferred_qs(current);
}
+EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
/**
* rcu_is_cpu_rrupt_from_idle - see if interrupted from idle
@@ -496,7 +497,7 @@ module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next
module_param(rcu_kick_kthreads, bool, 0644);
static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
-static int rcu_pending(void);
+static int rcu_pending(int user);
/*
* Return the number of RCU GPs completed thus far for debug & stats.
@@ -824,6 +825,11 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
rcu_cleanup_after_idle();
incby = 1;
+ } else if (tick_nohz_full_cpu(rdp->cpu) &&
+ rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
+ READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
+ rdp->rcu_forced_tick = true;
+ tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
}
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
rdp->dynticks_nmi_nesting,
@@ -885,6 +891,21 @@ void rcu_irq_enter_irqson(void)
local_irq_restore(flags);
}
+/*
+ * If any sort of urgency was applied to the current CPU (for example,
+ * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
+ * to get to a quiescent state, disable it.
+ */
+static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
+{
+ WRITE_ONCE(rdp->rcu_urgent_qs, false);
+ WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
+ if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
+ tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+ rdp->rcu_forced_tick = false;
+ }
+}
+
/**
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
*
@@ -1073,6 +1094,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
if (tick_nohz_full_cpu(rdp->cpu) &&
time_after(jiffies,
READ_ONCE(rdp->last_fqs_resched) + jtsq * 3)) {
+ WRITE_ONCE(*ruqp, true);
resched_cpu(rdp->cpu);
WRITE_ONCE(rdp->last_fqs_resched, jiffies);
}
@@ -1968,7 +1990,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
return;
}
mask = rdp->grpmask;
- rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
@@ -1979,6 +2000,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
if (!offloaded)
needwake = rcu_accelerate_cbs(rnp, rdp);
+ rcu_disable_urgency_upon_qs(rdp);
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
/* ^^^ Released rnp->lock */
if (needwake)
@@ -2101,6 +2123,9 @@ int rcutree_dead_cpu(unsigned int cpu)
rcu_boost_kthread_setaffinity(rnp, -1);
/* Do any needed no-CB deferred wakeups from this CPU. */
do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu));
+
+ // Stop-machine done, so allow nohz_full to disable tick.
+ tick_dep_clear(TICK_DEP_BIT_RCU);
return 0;
}
@@ -2151,6 +2176,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
rcu_nocb_unlock_irqrestore(rdp, flags);
/* Invoke callbacks. */
+ tick_dep_set_task(current, TICK_DEP_BIT_RCU);
rhp = rcu_cblist_dequeue(&rcl);
for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
debug_rcu_head_unqueue(rhp);
@@ -2217,6 +2243,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
/* Re-invoke RCU core processing if there are callbacks remaining. */
if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist))
invoke_rcu_core();
+ tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
}
/*
@@ -2241,7 +2268,7 @@ void rcu_sched_clock_irq(int user)
__this_cpu_write(rcu_data.rcu_urgent_qs, false);
}
rcu_flavor_sched_clock_irq(user);
- if (rcu_pending())
+ if (rcu_pending(user))
invoke_rcu_core();
trace_rcu_utilization(TPS("End scheduler-tick"));
@@ -2259,6 +2286,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
int cpu;
unsigned long flags;
unsigned long mask;
+ struct rcu_data *rdp;
struct rcu_node *rnp;
rcu_for_each_leaf_node(rnp) {
@@ -2283,8 +2311,11 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
for_each_leaf_node_possible_cpu(rnp, cpu) {
unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
if ((rnp->qsmask & bit) != 0) {
- if (f(per_cpu_ptr(&rcu_data, cpu)))
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (f(rdp)) {
mask |= bit;
+ rcu_disable_urgency_upon_qs(rdp);
+ }
}
}
if (mask != 0) {
@@ -2312,7 +2343,7 @@ void rcu_force_quiescent_state(void)
rnp = __this_cpu_read(rcu_data.mynode);
for (; rnp != NULL; rnp = rnp->parent) {
ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
- !raw_spin_trylock(&rnp->fqslock);
+ !raw_spin_trylock(&rnp->fqslock);
if (rnp_old != NULL)
raw_spin_unlock(&rnp_old->fqslock);
if (ret)
@@ -2786,8 +2817,9 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
* CPU-local state are performed first. However, we must check for CPU
* stalls first, else we might not get a chance.
*/
-static int rcu_pending(void)
+static int rcu_pending(int user)
{
+ bool gp_in_progress;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
@@ -2798,12 +2830,13 @@ static int rcu_pending(void)
if (rcu_nocb_need_deferred_wakeup(rdp))
return 1;
- /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */
- if (rcu_nohz_full_cpu())
+ /* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */
+ if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu())
return 0;
/* Is the RCU core waiting for a quiescent state from this CPU? */
- if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
+ gp_in_progress = rcu_gp_in_progress();
+ if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress)
return 1;
/* Does this CPU have callbacks ready to invoke? */
@@ -2811,8 +2844,7 @@ static int rcu_pending(void)
return 1;
/* Has RCU gone idle with this CPU needing another grace period? */
- if (!rcu_gp_in_progress() &&
- rcu_segcblist_is_enabled(&rdp->cblist) &&
+ if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&
(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) ||
!rcu_segcblist_is_offloaded(&rdp->cblist)) &&
!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
@@ -2845,7 +2877,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
{
if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
rcu_barrier_trace(TPS("LastCB"), -1,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
complete(&rcu_state.barrier_completion);
} else {
rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence);
@@ -2869,7 +2901,7 @@ static void rcu_barrier_func(void *unused)
} else {
debug_rcu_head_unqueue(&rdp->barrier_head);
rcu_barrier_trace(TPS("IRQNQ"), -1,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
}
rcu_nocb_unlock(rdp);
}
@@ -2896,7 +2928,7 @@ void rcu_barrier(void)
/* Did someone else do our work for us? */
if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
rcu_barrier_trace(TPS("EarlyExit"), -1,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
smp_mb(); /* caller's subsequent code after above check. */
mutex_unlock(&rcu_state.barrier_mutex);
return;
@@ -2928,11 +2960,11 @@ void rcu_barrier(void)
continue;
if (rcu_segcblist_n_cbs(&rdp->cblist)) {
rcu_barrier_trace(TPS("OnlineQ"), cpu,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
smp_call_function_single(cpu, rcu_barrier_func, NULL, 1);
} else {
rcu_barrier_trace(TPS("OnlineNQ"), cpu,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
}
}
put_online_cpus();
@@ -3083,6 +3115,9 @@ int rcutree_online_cpu(unsigned int cpu)
return 0; /* Too early in boot for scheduler work. */
sync_sched_exp_online_cleanup(cpu);
rcutree_affinity_setting(cpu, -1);
+
+ // Stop-machine done, so allow nohz_full to disable tick.
+ tick_dep_clear(TICK_DEP_BIT_RCU);
return 0;
}
@@ -3103,6 +3138,9 @@ int rcutree_offline_cpu(unsigned int cpu)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcutree_affinity_setting(cpu, cpu);
+
+ // nohz_full CPUs need the tick for stop-machine to work quickly
+ tick_dep_set(TICK_DEP_BIT_RCU);
return 0;
}
@@ -3148,6 +3186,7 @@ void rcu_cpu_starting(unsigned int cpu)
rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
+ rcu_disable_urgency_upon_qs(rdp);
/* Report QS -after- changing ->qsmaskinitnext! */
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
} else {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index c612f306fe89..055c31781d3a 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -181,6 +181,7 @@ struct rcu_data {
atomic_t dynticks; /* Even value for idle, else odd. */
bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */
bool rcu_urgent_qs; /* GP old need light quiescent state. */
+ bool rcu_forced_tick; /* Forced tick to provide QS. */
#ifdef CONFIG_RCU_FAST_NO_HZ
bool all_lazy; /* All CPU's CBs lazy at idle start? */
unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 2defc7fe74c3..fa08d55f7040 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1946,7 +1946,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
int __maybe_unused cpu = my_rdp->cpu;
unsigned long cur_gp_seq;
unsigned long flags;
- bool gotcbs;
+ bool gotcbs = false;
unsigned long j = jiffies;
bool needwait_gp = false; // This prevents actual uninitialized use.
bool needwake;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 998d50ee2d9b..1fe34a9fabc2 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -235,6 +235,7 @@ static int multi_cpu_stop(void *data)
*/
touch_nmi_watchdog();
}
+ rcu_momentary_dyntick_idle();
} while (curstate != MULTI_STOP_EXIT);
local_irq_restore(flags);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c2748232f607..8b192e67aabc 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -172,6 +172,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
bool tick_nohz_full_running;
+EXPORT_SYMBOL_GPL(tick_nohz_full_running);
static atomic_t tick_dep_mask;
static bool check_tick_dependency(atomic_t *dep)
@@ -198,6 +199,11 @@ static bool check_tick_dependency(atomic_t *dep)
return true;
}
+ if (val & TICK_DEP_MASK_RCU) {
+ trace_tick_stop(0, TICK_DEP_MASK_RCU);
+ return true;
+ }
+
return false;
}
@@ -324,6 +330,7 @@ void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
preempt_enable();
}
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
@@ -331,6 +338,7 @@ void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
atomic_andnot(BIT(bit), &ts->tick_dep_mask);
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
/*
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
@@ -344,11 +352,13 @@ void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
*/
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
}
+EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
/*
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
@@ -397,6 +407,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
cpumask_copy(tick_nohz_full_mask, cpumask);
tick_nohz_full_running = true;
}
+EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
static int tick_nohz_cpu_down(unsigned int cpu)
{
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 914b845ad4ff..bc88fd939f4e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -365,11 +365,6 @@ static void show_pwq(struct pool_workqueue *pwq);
!lockdep_is_held(&wq_pool_mutex), \
"RCU or wq_pool_mutex should be held")
-#define assert_rcu_or_wq_mutex(wq) \
- RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
- !lockdep_is_held(&wq->mutex), \
- "RCU or wq->mutex should be held")
-
#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
!lockdep_is_held(&wq->mutex) && \
@@ -427,9 +422,7 @@ static void show_pwq(struct pool_workqueue *pwq);
*/
#define for_each_pwq(pwq, wq) \
list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
- lockdep_is_held(&wq->mutex)) \
- if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
- else
+ lockdep_is_held(&(wq->mutex)))
#ifdef CONFIG_DEBUG_OBJECTS_WORK