summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c58
1 files changed, 32 insertions, 26 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index c41820481b2e..b4386ff6a641 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -96,6 +96,7 @@
#include <linux/kasan.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
+#include <linux/bpf.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -379,14 +380,17 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
void *stack = task_stack_page(tsk);
struct vm_struct *vm = task_stack_vm_area(tsk);
+ if (vm) {
+ int i;
- /* All stack pages are in the same node. */
- if (vm)
- mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
- else
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
+ account * (PAGE_SIZE / 1024));
+ } else {
+ /* All stack pages are in the same node. */
mod_lruvec_kmem_state(stack, NR_KERNEL_STACK_KB,
account * (THREAD_SIZE / 1024));
+ }
}
static int memcg_charge_kernel_stack(struct task_struct *tsk)
@@ -421,7 +425,7 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
static void release_task_stack(struct task_struct *tsk)
{
- if (WARN_ON(tsk->state != TASK_DEAD))
+ if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
account_kernel_stack(tsk, -1);
@@ -734,9 +738,11 @@ void __put_task_struct(struct task_struct *tsk)
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
+ bpf_task_storage_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
+ sched_core_free(tsk);
if (!profile_handoff_task(tsk))
free_task(tsk);
@@ -932,6 +938,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;
+ tsk->pf_io_worker = NULL;
account_kernel_stack(tsk, 1);
@@ -1144,7 +1151,7 @@ void mmput_async(struct mm_struct *mm)
* invocations: in mmput() nobody alive left, in execve task is single
* threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
* mm->exe_file, but does so without using set_mm_exe_file() in order
- * to do avoid the need for any locks.
+ * to avoid the need for any locks.
*/
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
@@ -1395,7 +1402,6 @@ fail_nomem:
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm;
- int retval;
tsk->min_flt = tsk->maj_flt = 0;
tsk->nvcsw = tsk->nivcsw = 0;
@@ -1422,21 +1428,15 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
if (clone_flags & CLONE_VM) {
mmget(oldmm);
mm = oldmm;
- goto good_mm;
+ } else {
+ mm = dup_mm(tsk, current->mm);
+ if (!mm)
+ return -ENOMEM;
}
- retval = -ENOMEM;
- mm = dup_mm(tsk, current->mm);
- if (!mm)
- goto fail_nomem;
-
-good_mm:
tsk->mm = mm;
tsk->active_mm = mm;
return 0;
-
-fail_nomem:
- return retval;
}
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
@@ -1742,7 +1742,7 @@ static int pidfd_release(struct inode *inode, struct file *file)
* /proc/<pid>/status where Pid and NSpid are always shown relative to
* the pid namespace of the procfs instance. The difference becomes
* obvious when sending around a pidfd between pid namespaces from a
- * different branch of the tree, i.e. where no ancestoral relation is
+ * different branch of the tree, i.e. where no ancestral relation is
* present between the pid namespaces:
* - create two new pid namespaces ns1 and ns2 in the initial pid
* namespace (also take care to create new mount namespaces in the
@@ -1946,7 +1946,7 @@ static __latent_entropy struct task_struct *copy_process(
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
retval = -ERESTARTNOINTR;
- if (signal_pending(current))
+ if (task_sigpending(current))
goto fork_out;
retval = -ENOMEM;
@@ -2004,7 +2004,7 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_cleanup_count;
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
- p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
+ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE | PF_NO_SETAFFINITY);
p->flags |= PF_FORKNOEXEC;
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
@@ -2082,13 +2082,16 @@ static __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
+#ifdef CONFIG_BPF_SYSCALL
+ RCU_INIT_POINTER(p->bpf_storage, NULL);
+#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
if (retval)
goto bad_fork_cleanup_policy;
- retval = perf_event_init_task(p);
+ retval = perf_event_init_task(p, clone_flags);
if (retval)
goto bad_fork_cleanup_policy;
retval = audit_alloc(p);
@@ -2251,6 +2254,8 @@ static __latent_entropy struct task_struct *copy_process(
klp_copy_process(p);
+ sched_core_fork(p);
+
spin_lock(&current->sighand->siglock);
/*
@@ -2338,6 +2343,7 @@ static __latent_entropy struct task_struct *copy_process(
return p;
bad_fork_cancel_cgroup:
+ sched_core_free(p);
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p, args);
@@ -2389,7 +2395,7 @@ bad_fork_cleanup_count:
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
exit_creds(p);
bad_fork_free:
- p->state = TASK_DEAD;
+ WRITE_ONCE(p->__state, TASK_DEAD);
put_task_stack(p);
delayed_free_task(p);
fork_out:
@@ -2409,7 +2415,7 @@ static inline void init_idle_pids(struct task_struct *idle)
}
}
-struct task_struct *fork_idle(int cpu)
+struct task_struct * __init fork_idle(int cpu)
{
struct task_struct *task;
struct kernel_clone_args args = {
@@ -2729,8 +2735,8 @@ static bool clone3_args_valid(struct kernel_clone_args *kargs)
return false;
/*
- * - make the CLONE_DETACHED bit reuseable for clone3
- * - make the CSIGNAL bits reuseable for clone3
+ * - make the CLONE_DETACHED bit reusable for clone3
+ * - make the CSIGNAL bits reusable for clone3
*/
if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
return false;