summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2022-09-29 13:20:50 +0300
committerPeter Zijlstra <peterz@infradead.org>2022-09-29 13:20:50 +0300
commita1ebcd59430236b336428bbf8e1da16fb87d56e4 (patch)
tree126b999f1eae9b7ecf1045eec425a6dbcdcb4351 /kernel
parentcce6a2d7e0e494c453ad73e1e78bd50684f20cca (diff)
parentf76349cf41451c5c42a99f18a9163377e4b364ff (diff)
downloadlinux-a1ebcd59430236b336428bbf8e1da16fb87d56e4.tar.xz
Merge branch 'v6.0-rc7'
Merge upstream to get RAPTORLAKE_S Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_fsnotify.c1
-rw-r--r--kernel/auditsc.c4
-rw-r--r--kernel/bpf/cgroup.c4
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--kernel/cgroup/cgroup-v1.c2
-rw-r--r--kernel/cgroup/cgroup.c85
-rw-r--r--kernel/cgroup/cpuset.c3
-rw-r--r--kernel/crash_core.c1
-rw-r--r--kernel/dma/debug.c6
-rw-r--r--kernel/dma/mapping.c3
-rw-r--r--kernel/dma/swiotlb.c13
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/kprobes.c10
-rw-r--r--kernel/module/main.c2
-rw-r--r--kernel/nsproxy.c3
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/psi.c10
-rw-r--r--kernel/sched/wait_bit.c2
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/trace/ftrace.c79
-rw-r--r--kernel/trace/rv/monitors/wip/wip.h2
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.h2
-rw-r--r--kernel/trace/rv/reactor_panic.c4
-rw-r--r--kernel/trace/rv/reactor_printk.c4
-rw-r--r--kernel/trace/trace_events_trigger.c3
-rw-r--r--kernel/trace/trace_preemptirq.c4
-rw-r--r--kernel/tracepoint.c5
-rw-r--r--kernel/workqueue.c6
30 files changed, 152 insertions, 132 deletions
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 6432a37ac1c9..c565fbf66ac8 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
if (ret < 0) {
+ audit_mark->path = NULL;
fsnotify_put_mark(&audit_mark->mark);
audit_mark = ERR_PTR(ret);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index dd8d9ab747c3..79a5da1bc5bb 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1940,6 +1940,7 @@ void __audit_uring_exit(int success, long code)
goto out;
}
+ audit_return_fixup(ctx, success, code);
if (ctx->context == AUDIT_CTX_SYSCALL) {
/*
* NOTE: See the note in __audit_uring_entry() about the case
@@ -1981,7 +1982,6 @@ void __audit_uring_exit(int success, long code)
audit_filter_inodes(current, ctx);
if (ctx->current_state != AUDIT_STATE_RECORD)
goto out;
- audit_return_fixup(ctx, success, code);
audit_log_exit();
out:
@@ -2065,13 +2065,13 @@ void __audit_syscall_exit(int success, long return_code)
if (!list_empty(&context->killed_trees))
audit_kill_trees(context);
+ audit_return_fixup(context, success, return_code);
/* run through both filters to ensure we set the filterkey properly */
audit_filter_syscall(current, context);
audit_filter_inodes(current, context);
if (context->current_state < AUDIT_STATE_RECORD)
goto out;
- audit_return_fixup(context, success, return_code);
audit_log_exit();
out:
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 59b7eb60d5b4..4a400cd63731 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -921,8 +921,10 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
pos++;
}
}
+
+ /* no link or prog match, skip the cgroup of this layer */
+ continue;
found:
- BUG_ON(!cg);
progs = rcu_dereference_protected(
desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c1e10d088dbb..3d9eb3ae334c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -971,7 +971,7 @@ pure_initcall(bpf_jit_charge_init);
int bpf_jit_charge_modmem(u32 size)
{
- if (atomic_long_add_return(size, &bpf_jit_current) > bpf_jit_limit) {
+ if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) {
if (!bpf_capable()) {
atomic_long_sub(size, &bpf_jit_current);
return -EPERM;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a4d40d98428a..27760627370d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5197,7 +5197,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_sys_bpf:
- return &bpf_sys_bpf_proto;
+ return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
case BPF_FUNC_btf_find_by_name_kind:
return &bpf_btf_find_by_name_kind_proto;
case BPF_FUNC_sys_close:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 096fdac70165..3eadb14e090b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6066,6 +6066,9 @@ skip_type_check:
return -EACCES;
}
meta->mem_size = reg->var_off.value;
+ err = mark_chain_precision(env, regno);
+ if (err)
+ return err;
break;
case ARG_PTR_TO_INT:
case ARG_PTR_TO_LONG:
@@ -7030,8 +7033,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
struct bpf_reg_state *regs = cur_regs(env), *reg;
struct bpf_map *map = meta->map_ptr;
- struct tnum range;
- u64 val;
+ u64 val, max;
int err;
if (func_id != BPF_FUNC_tail_call)
@@ -7041,10 +7043,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return -EINVAL;
}
- range = tnum_range(0, map->max_entries - 1);
reg = &regs[BPF_REG_3];
+ val = reg->var_off.value;
+ max = map->max_entries;
- if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
+ if (!(register_is_const(reg) && val < max)) {
bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
return 0;
}
@@ -7052,8 +7055,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
err = mark_chain_precision(env, BPF_REG_3);
if (err)
return err;
-
- val = reg->var_off.value;
if (bpf_map_key_unseen(aux))
bpf_map_key_store(aux, val);
else if (!bpf_map_key_poisoned(aux) &&
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 2ade21b54dc4..ff6a8099eb2a 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -59,6 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
int retval = 0;
mutex_lock(&cgroup_mutex);
+ cpus_read_lock();
percpu_down_write(&cgroup_threadgroup_rwsem);
for_each_root(root) {
struct cgroup *from_cgrp;
@@ -72,6 +73,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
break;
}
percpu_up_write(&cgroup_threadgroup_rwsem);
+ cpus_read_unlock();
mutex_unlock(&cgroup_mutex);
return retval;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ffaccd6373f1..5f2090d051ac 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1820,6 +1820,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
if (ss->css_rstat_flush) {
list_del_rcu(&css->rstat_css_node);
+ synchronize_rcu();
list_add_rcu(&css->rstat_css_node,
&dcgrp->rstat_css_list);
}
@@ -2370,6 +2371,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
EXPORT_SYMBOL_GPL(task_cgroup_path);
/**
+ * cgroup_attach_lock - Lock for ->attach()
+ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
+ *
+ * cgroup migration sometimes needs to stabilize threadgroups against forks and
+ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach()
+ * implementations (e.g. cpuset), also need to disable CPU hotplug.
+ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can
+ * lead to deadlocks.
+ *
+ * Bringing up a CPU may involve creating and destroying tasks which requires
+ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
+ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while
+ * write-locking threadgroup_rwsem, the locking order is reversed and we end up
+ * waiting for an on-going CPU hotplug operation which in turn is waiting for
+ * the threadgroup_rwsem to be released to create new tasks. For more details:
+ *
+ * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu
+ *
+ * Resolve the situation by always acquiring cpus_read_lock() before optionally
+ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
+ * CPU hotplug is disabled on entry.
+ */
+static void cgroup_attach_lock(bool lock_threadgroup)
+{
+ cpus_read_lock();
+ if (lock_threadgroup)
+ percpu_down_write(&cgroup_threadgroup_rwsem);
+}
+
+/**
+ * cgroup_attach_unlock - Undo cgroup_attach_lock()
+ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
+ */
+static void cgroup_attach_unlock(bool lock_threadgroup)
+{
+ if (lock_threadgroup)
+ percpu_up_write(&cgroup_threadgroup_rwsem);
+ cpus_read_unlock();
+}
+
+/**
* cgroup_migrate_add_task - add a migration target task to a migration context
* @task: target task
* @mgctx: target migration context
@@ -2841,8 +2883,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
}
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
- bool *locked)
- __acquires(&cgroup_threadgroup_rwsem)
+ bool *threadgroup_locked)
{
struct task_struct *tsk;
pid_t pid;
@@ -2859,12 +2900,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
* Therefore, we can skip the global lock.
*/
lockdep_assert_held(&cgroup_mutex);
- if (pid || threadgroup) {
- percpu_down_write(&cgroup_threadgroup_rwsem);
- *locked = true;
- } else {
- *locked = false;
- }
+ *threadgroup_locked = pid || threadgroup;
+ cgroup_attach_lock(*threadgroup_locked);
rcu_read_lock();
if (pid) {
@@ -2895,17 +2932,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
goto out_unlock_rcu;
out_unlock_threadgroup:
- if (*locked) {
- percpu_up_write(&cgroup_threadgroup_rwsem);
- *locked = false;
- }
+ cgroup_attach_unlock(*threadgroup_locked);
+ *threadgroup_locked = false;
out_unlock_rcu:
rcu_read_unlock();
return tsk;
}
-void cgroup_procs_write_finish(struct task_struct *task, bool locked)
- __releases(&cgroup_threadgroup_rwsem)
+void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked)
{
struct cgroup_subsys *ss;
int ssid;
@@ -2913,8 +2947,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked)
/* release reference from cgroup_procs_write_start() */
put_task_struct(task);
- if (locked)
- percpu_up_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_unlock(threadgroup_locked);
+
for_each_subsys(ss, ssid)
if (ss->post_attach)
ss->post_attach();
@@ -3000,8 +3034,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
* write-locking can be skipped safely.
*/
has_tasks = !list_empty(&mgctx.preloaded_src_csets);
- if (has_tasks)
- percpu_down_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_lock(has_tasks);
/* NULL dst indicates self on default hierarchy */
ret = cgroup_migrate_prepare_dst(&mgctx);
@@ -3022,8 +3055,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
ret = cgroup_migrate_execute(&mgctx);
out_finish:
cgroup_migrate_finish(&mgctx);
- if (has_tasks)
- percpu_up_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_unlock(has_tasks);
return ret;
}
@@ -3698,7 +3730,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
}
psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
- new = psi_trigger_create(psi, buf, nbytes, res);
+ new = psi_trigger_create(psi, buf, res);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
@@ -4971,13 +5003,13 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
struct task_struct *task;
const struct cred *saved_cred;
ssize_t ret;
- bool locked;
+ bool threadgroup_locked;
dst_cgrp = cgroup_kn_lock_live(of->kn, false);
if (!dst_cgrp)
return -ENODEV;
- task = cgroup_procs_write_start(buf, threadgroup, &locked);
+ task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked);
ret = PTR_ERR_OR_ZERO(task);
if (ret)
goto out_unlock;
@@ -5003,7 +5035,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
ret = cgroup_attach_task(dst_cgrp, task, threadgroup);
out_finish:
- cgroup_procs_write_finish(task, locked);
+ cgroup_procs_write_finish(task, threadgroup_locked);
out_unlock:
cgroup_kn_unlock(of->kn);
@@ -6017,6 +6049,9 @@ struct cgroup *cgroup_get_from_id(u64 id)
if (!kn)
goto out;
+ if (kernfs_type(kn) != KERNFS_DIR)
+ goto put;
+
rcu_read_lock();
cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
@@ -6024,7 +6059,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
cgrp = NULL;
rcu_read_unlock();
-
+put:
kernfs_put(kn);
out:
return cgrp;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 58aadfda9b8b..1f3a55297f39 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2289,7 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
- cpus_read_lock();
+ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */
percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
@@ -2343,7 +2343,6 @@ static void cpuset_attach(struct cgroup_taskset *tset)
wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
- cpus_read_unlock();
}
/* The various types of files and directories in a cpuset file system */
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 07b26df453a9..a0eb4d5cf557 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -494,6 +494,7 @@ static int __init crash_save_vmcoreinfo_init(void)
#ifdef CONFIG_KALLSYMS
VMCOREINFO_SYMBOL(kallsyms_names);
+ VMCOREINFO_SYMBOL(kallsyms_num_syms);
VMCOREINFO_SYMBOL(kallsyms_token_table);
VMCOREINFO_SYMBOL(kallsyms_token_index);
#ifdef CONFIG_KALLSYMS_BASE_RELATIVE
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 2caafd13f8aa..18c93c2276ca 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -350,11 +350,10 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
unsigned long *flags)
{
- unsigned int max_range = dma_get_max_seg_size(ref->dev);
struct dma_debug_entry *entry, index = *ref;
- unsigned int range = 0;
+ int limit = min(HASH_SIZE, (index.dev_addr >> HASH_FN_SHIFT) + 1);
- while (range <= max_range) {
+ for (int i = 0; i < limit; i++) {
entry = __hash_bucket_find(*bucket, ref, containing_match);
if (entry)
@@ -364,7 +363,6 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
* Nothing found, go back a hash bucket
*/
put_hash_bucket(*bucket, *flags);
- range += (1 << HASH_FN_SHIFT);
index.dev_addr -= (1 << HASH_FN_SHIFT);
*bucket = get_hash_bucket(&index, flags);
}
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 49cbf3e33de7..27f272381cf2 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -707,7 +707,7 @@ int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
}
EXPORT_SYMBOL_GPL(dma_mmap_noncontiguous);
-int dma_supported(struct device *dev, u64 mask)
+static int dma_supported(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
@@ -721,7 +721,6 @@ int dma_supported(struct device *dev, u64 mask)
return 1;
return ops->dma_supported(dev, mask);
}
-EXPORT_SYMBOL(dma_supported);
bool dma_pci_p2pdma_supported(struct device *dev)
{
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c5a9190b218f..0ef6b12f961d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -326,9 +326,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
swiotlb_adjust_nareas(num_possible_cpus());
nslabs = default_nslabs;
- if (nslabs < IO_TLB_MIN_SLABS)
- panic("%s: nslabs = %lu too small\n", __func__, nslabs);
-
/*
* By default allocate the bounce buffer memory from low memory, but
* allow to pick a location everywhere for hypervisors with guest
@@ -341,8 +338,7 @@ retry:
else
tlb = memblock_alloc_low(bytes, PAGE_SIZE);
if (!tlb) {
- pr_warn("%s: Failed to allocate %zu bytes tlb structure\n",
- __func__, bytes);
+ pr_warn("%s: failed to allocate tlb structure\n", __func__);
return;
}
@@ -579,7 +575,10 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
}
}
-#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
+static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
+{
+ return start + (idx << IO_TLB_SHIFT);
+}
/*
* Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
@@ -765,7 +764,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
/*
* When dir == DMA_FROM_DEVICE we could omit the copy from the orig
* to the tlb buffer, if we knew for sure the device will
- * overwirte the entire current content. But we don't. Thus
+ * overwrite the entire current content. But we don't. Thus
* unconditional bounce may prevent leaking swiotlb content (i.e.
* kernel memory) to user-space.
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index 90c85b17bf69..2b6bd511c6ed 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1225,6 +1225,7 @@ void mmput_async(struct mm_struct *mm)
schedule_work(&mm->async_put_work);
}
}
+EXPORT_SYMBOL_GPL(mmput_async);
#endif
/**
@@ -2046,11 +2047,8 @@ static __latent_entropy struct task_struct *copy_process(
/*
* If the new process will be in a different time namespace
* do not allow it to share VM or a thread group with the forking task.
- *
- * On vfork, the child process enters the target time namespace only
- * after exec.
*/
- if ((clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) {
+ if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
if (nsp->time_ns != nsp->time_ns_for_children)
return ERR_PTR(-EINVAL);
}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 80697e5e03e4..ca9d834d0b84 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1562,6 +1562,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
/* Ensure it is not in reserved area nor out of text */
if (!(core_kernel_text((unsigned long) p->addr) ||
is_module_text_address((unsigned long) p->addr)) ||
+ in_gate_area_no_mm((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
static_call_text_reserved(p->addr, p->addr) ||
@@ -1707,11 +1708,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
/* Try to disarm and disable this/parent probe */
if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
/*
- * If 'kprobes_all_disarmed' is set, 'orig_p'
- * should have already been disarmed, so
- * skip unneed disarming process.
+ * Don't be lazy here. Even if 'kprobes_all_disarmed'
+ * is false, 'orig_p' might not have been armed yet.
+ * Note arm_all_kprobes() __tries__ to arm all kprobes
+ * on the best effort basis.
*/
- if (!kprobes_all_disarmed) {
+ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
ret = disarm_kprobe(orig_p, true);
if (ret) {
p->flags &= ~KPROBE_FLAG_DISABLED;
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 6a477c622544..a4e4d84b6f4e 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2099,7 +2099,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->static_call_sites),
&mod->num_static_call_sites);
#endif
-#ifdef CONFIG_KUNIT
+#if IS_ENABLED(CONFIG_KUNIT)
mod->kunit_suites = section_objs(info, ".kunit_test_suites",
sizeof(*mod->kunit_suites),
&mod->num_kunit_suites);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index b4cbb406bc28..eec72ca962e2 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -179,8 +179,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
if (IS_ERR(new_ns))
return PTR_ERR(new_ns);
- if ((flags & CLONE_VM) == 0)
- timens_on_fork(new_ns, tsk);
+ timens_on_fork(new_ns, tsk);
tsk->nsproxy = new_ns;
return 0;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index bb3d63bdf4ae..667876da8382 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -416,7 +416,7 @@ void update_sched_domain_debugfs(void)
char buf[32];
snprintf(buf, sizeof(buf), "cpu%d", cpu);
- debugfs_remove(debugfs_lookup(buf, sd_dentry));
+ debugfs_lookup_and_remove(buf, sd_dentry);
d_cpu = debugfs_create_dir(buf, sd_dentry);
i = 0;
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index ec66b40bdd40..ecb4b4ff4ce0 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -190,12 +190,8 @@ static void group_init(struct psi_group *group)
/* Init trigger-related members */
mutex_init(&group->trigger_lock);
INIT_LIST_HEAD(&group->triggers);
- memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
- group->poll_states = 0;
group->poll_min_period = U32_MAX;
- memset(group->polling_total, 0, sizeof(group->polling_total));
group->polling_next_update = ULLONG_MAX;
- group->polling_until = 0;
init_waitqueue_head(&group->poll_wait);
timer_setup(&group->poll_timer, poll_timer_fn, 0);
rcu_assign_pointer(group->poll_task, NULL);
@@ -957,7 +953,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
if (static_branch_likely(&psi_disabled))
return 0;
- cgroup->psi = kmalloc(sizeof(struct psi_group), GFP_KERNEL);
+ cgroup->psi = kzalloc(sizeof(struct psi_group), GFP_KERNEL);
if (!cgroup->psi)
return -ENOMEM;
@@ -1091,7 +1087,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
}
struct psi_trigger *psi_trigger_create(struct psi_group *group,
- char *buf, size_t nbytes, enum psi_res res)
+ char *buf, enum psi_res res)
{
struct psi_trigger *t;
enum psi_states state;
@@ -1320,7 +1316,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
return -EBUSY;
}
- new = psi_trigger_create(&psi_system, buf, nbytes, res);
+ new = psi_trigger_create(&psi_system, buf, res);
if (IS_ERR(new)) {
mutex_unlock(&seq->lock);
return PTR_ERR(new);
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index d4788f810b55..0b1cd985dc27 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
ret = (*action)(&wbq_entry->key, mode);
- } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+ } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
finish_wait(wq_head, &wbq_entry->wq_entry);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a492f159624f..860b2dcf3ac4 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -277,6 +277,7 @@ COND_SYSCALL(landlock_restrict_self);
/* mm/fadvise.c */
COND_SYSCALL(fadvise64_64);
+COND_SYSCALL_COMPAT(fadvise64_64);
/* mm/, CONFIG_MMU only */
COND_SYSCALL(swapon);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 126c769d36c3..439e2ab6905e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1861,8 +1861,6 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
ftrace_hash_rec_update_modify(ops, filter_hash, 1);
}
-static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
-
/*
* Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
* or no-needed to update, -EBUSY if it detects a conflict of the flag
@@ -3118,49 +3116,6 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
ftrace_hash_empty(ops->func_hash->notrace_hash);
}
-/*
- * Check if the current ops references the given ip.
- *
- * If the ops traces all functions, then it was already accounted for.
- * If the ops does not trace the current record function, skip it.
- * If the ops ignores the function via notrace filter, skip it.
- */
-static bool
-ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
-{
- /* If ops isn't enabled, ignore it */
- if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
- return false;
-
- /* If ops traces all then it includes this function */
- if (ops_traces_mod(ops))
- return true;
-
- /* The function must be in the filter */
- if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
- !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
- return false;
-
- /* If in notrace hash, we ignore it too */
- if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
- return false;
-
- return true;
-}
-
-/*
- * Check if the current ops references the record.
- *
- * If the ops traces all functions, then it was already accounted for.
- * If the ops does not trace the current record function, skip it.
- * If the ops ignores the function via notrace filter, skip it.
- */
-static bool
-ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
-{
- return ops_references_ip(ops, rec->ip);
-}
-
static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
bool init_nop = ftrace_need_init_nop();
@@ -6822,6 +6777,38 @@ static int ftrace_get_trampoline_kallsym(unsigned int symnum,
return -ERANGE;
}
+#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) || defined(CONFIG_MODULES)
+/*
+ * Check if the current ops references the given ip.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static bool
+ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
+{
+ /* If ops isn't enabled, ignore it */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return false;
+
+ /* If ops traces all then it includes this function */
+ if (ops_traces_mod(ops))
+ return true;
+
+ /* The function must be in the filter */
+ if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
+ !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
+ return false;
+
+ /* If in notrace hash, we ignore it too */
+ if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
+ return false;
+
+ return true;
+}
+#endif
+
#ifdef CONFIG_MODULES
#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
@@ -6834,7 +6821,7 @@ static int referenced_filters(struct dyn_ftrace *rec)
int cnt = 0;
for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
- if (ops_references_rec(ops, rec)) {
+ if (ops_references_ip(ops, rec->ip)) {
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT))
continue;
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY))
diff --git a/kernel/trace/rv/monitors/wip/wip.h b/kernel/trace/rv/monitors/wip/wip.h
index c1c47e2305ef..dacc37b62a2c 100644
--- a/kernel/trace/rv/monitors/wip/wip.h
+++ b/kernel/trace/rv/monitors/wip/wip.h
@@ -27,7 +27,7 @@ struct automaton_wip {
bool final_states[state_max_wip];
};
-struct automaton_wip automaton_wip = {
+static struct automaton_wip automaton_wip = {
.state_names = {
"preemptive",
"non_preemptive"
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.h b/kernel/trace/rv/monitors/wwnr/wwnr.h
index d1afe55cdd4c..118e576b91b4 100644
--- a/kernel/trace/rv/monitors/wwnr/wwnr.h
+++ b/kernel/trace/rv/monitors/wwnr/wwnr.h
@@ -27,7 +27,7 @@ struct automaton_wwnr {
bool final_states[state_max_wwnr];
};
-struct automaton_wwnr automaton_wwnr = {
+static struct automaton_wwnr automaton_wwnr = {
.state_names = {
"not_running",
"running"
diff --git a/kernel/trace/rv/reactor_panic.c b/kernel/trace/rv/reactor_panic.c
index b698d05dd069..d65f6c25a87c 100644
--- a/kernel/trace/rv/reactor_panic.c
+++ b/kernel/trace/rv/reactor_panic.c
@@ -24,13 +24,13 @@ static struct rv_reactor rv_panic = {
.react = rv_panic_reaction
};
-static int register_react_panic(void)
+static int __init register_react_panic(void)
{
rv_register_reactor(&rv_panic);
return 0;
}
-static void unregister_react_panic(void)
+static void __exit unregister_react_panic(void)
{
rv_unregister_reactor(&rv_panic);
}
diff --git a/kernel/trace/rv/reactor_printk.c b/kernel/trace/rv/reactor_printk.c
index 31899f953af4..4b6b7106a477 100644
--- a/kernel/trace/rv/reactor_printk.c
+++ b/kernel/trace/rv/reactor_printk.c
@@ -23,13 +23,13 @@ static struct rv_reactor rv_printk = {
.react = rv_printk_reaction
};
-static int register_react_printk(void)
+static int __init register_react_printk(void)
{
rv_register_reactor(&rv_printk);
return 0;
}
-static void unregister_react_printk(void)
+static void __exit unregister_react_printk(void)
{
rv_unregister_reactor(&rv_printk);
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index cb866c3141af..918730d74932 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -142,7 +142,8 @@ static bool check_user_trigger(struct trace_event_file *file)
{
struct event_trigger_data *data;
- list_for_each_entry_rcu(data, &file->triggers, list) {
+ list_for_each_entry_rcu(data, &file->triggers, list,
+ lockdep_is_held(&event_mutex)) {
if (data->flags & EVENT_TRIGGER_FL_PROBE)
continue;
return true;
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index 95b58bd757ce..1e130da1b742 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -95,14 +95,14 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
}
lockdep_hardirqs_on_prepare();
- lockdep_hardirqs_on(CALLER_ADDR0);
+ lockdep_hardirqs_on(caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
- lockdep_hardirqs_off(CALLER_ADDR0);
+ lockdep_hardirqs_off(caller_addr);
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 64ea283f2f86..ef42c1a11920 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -571,7 +571,8 @@ static void for_each_tracepoint_range(
bool trace_module_has_bad_taint(struct module *mod)
{
return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) |
- (1 << TAINT_UNSIGNED_MODULE));
+ (1 << TAINT_UNSIGNED_MODULE) |
+ (1 << TAINT_TEST));
}
static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);
@@ -647,7 +648,7 @@ static int tracepoint_module_coming(struct module *mod)
/*
* We skip modules that taint the kernel, especially those with different
* module headers (for forced load), to make sure we don't cause a crash.
- * Staging, out-of-tree, and unsigned GPL modules are fine.
+ * Staging, out-of-tree, unsigned GPL, and test modules are fine.
*/
if (trace_module_has_bad_taint(mod))
return 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index aeea9731ef80..39060a5d0905 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3066,10 +3066,8 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
if (WARN_ON(!work->func))
return false;
- if (!from_cancel) {
- lock_map_acquire(&work->lockdep_map);
- lock_map_release(&work->lockdep_map);
- }
+ lock_map_acquire(&work->lockdep_map);
+ lock_map_release(&work->lockdep_map);
if (start_flush_work(work, &barr, from_cancel)) {
wait_for_completion(&barr.done);