summaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h254
1 files changed, 203 insertions, 51 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ca3f3eae8980..89541d248893 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,8 +25,10 @@
#include <linux/latencytop.h>
#include <linux/sched/prio.h>
#include <linux/signal_types.h>
+#include <linux/psi_types.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
+#include <linux/rseq.h>
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -117,7 +119,7 @@ struct task_group;
* the comment with set_special_state().
*/
#define is_special_task_state(state) \
- ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
#define __set_current_state(state_value) \
do { \
@@ -166,15 +168,15 @@ struct task_group;
* need_sleep = false;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
- * Where wake_up_state() (and all other wakeup primitives) imply enough
- * barriers to order the store of the variable against wakeup.
+ * where wake_up_state() executes a full memory barrier before accessing the
+ * task state.
*
* Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
*
* However, with slightly different timing the wakeup TASK_RUNNING store can
- * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
+ * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
* a problem either because that will result in one extra go around the loop
* and our @cond test will save the day.
*
@@ -513,7 +515,7 @@ struct sched_dl_entity {
/*
* Actual scheduling parameters. Initialized with the values above,
- * they are continously updated during task execution. Note that
+ * they are continuously updated during task execution. Note that
* the remaining runtime could be < 0 in case we are in overrun.
*/
s64 runtime; /* Remaining runtime for this instance */
@@ -570,10 +572,8 @@ union rcu_special {
struct {
u8 blocked;
u8 need_qs;
- u8 exp_need_qs;
-
- /* Otherwise the compiler can store garbage here: */
- u8 pad;
+ u8 exp_hint; /* Hint for performance. */
+ u8 pad; /* No garbage from compiler! */
} b; /* Bits. */
u32 s; /* Set of bits. */
};
@@ -709,6 +709,10 @@ struct task_struct {
unsigned sched_contributes_to_load:1;
unsigned sched_migrated:1;
unsigned sched_remote_wakeup:1;
+#ifdef CONFIG_PSI
+ unsigned sched_psi_wake_requeue:1;
+#endif
+
/* Force alignment to the next boundary: */
unsigned :0;
@@ -721,10 +725,7 @@ struct task_struct {
unsigned restore_sigmask:1;
#endif
#ifdef CONFIG_MEMCG
- unsigned memcg_may_oom:1;
-#ifndef CONFIG_SLOB
- unsigned memcg_kmem_skip_account:1;
-#endif
+ unsigned in_user_fault:1;
#endif
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
@@ -733,6 +734,16 @@ struct task_struct {
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
#endif
+#ifdef CONFIG_BLK_CGROUP
+ /* to be used once the psi infrastructure lands upstream. */
+ unsigned use_memdelay:1;
+#endif
+
+ /*
+ * May usercopy functions fault on kernel addresses?
+ * This is not just a single bit because this can potentially nest.
+ */
+ unsigned int kernel_uaccess_faults_ok;
unsigned long atomic_flags; /* Flags requiring atomic access. */
@@ -741,7 +752,7 @@ struct task_struct {
pid_t pid;
pid_t tgid;
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
/* Canary value for the -fstack-protector GCC feature: */
unsigned long stack_canary;
#endif
@@ -774,7 +785,8 @@ struct task_struct {
struct list_head ptrace_entry;
/* PID/PID hash table linkage. */
- struct pid_link pids[PIDTYPE_MAX];
+ struct pid *thread_pid;
+ struct hlist_node pid_links[PIDTYPE_MAX];
struct list_head thread_group;
struct list_head thread_node;
@@ -848,6 +860,7 @@ struct task_struct {
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
unsigned long last_switch_count;
+ unsigned long last_switch_time;
#endif
/* Filesystem information: */
struct fs_struct *fs;
@@ -953,9 +966,13 @@ struct task_struct {
/* Ptrace state: */
unsigned long ptrace_message;
- siginfo_t *last_siginfo;
+ kernel_siginfo_t *last_siginfo;
struct task_io_accounting ioac;
+#ifdef CONFIG_PSI
+ /* Pressure stall state */
+ unsigned int psi_flags;
+#endif
#ifdef CONFIG_TASK_XACCT
/* Accumulated RSS usage: */
u64 acct_rss_mem1;
@@ -978,7 +995,7 @@ struct task_struct {
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
struct list_head cg_list;
#endif
-#ifdef CONFIG_INTEL_RDT
+#ifdef CONFIG_RESCTRL
u32 closid;
u32 rmid;
#endif
@@ -1016,7 +1033,6 @@ struct task_struct {
u64 last_sum_exec_runtime;
struct callback_head numa_work;
- struct list_head numa_entry;
struct numa_group *numa_group;
/*
@@ -1047,6 +1063,17 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_RSEQ
+ struct rseq __user *rseq;
+ u32 rseq_len;
+ u32 rseq_sig;
+ /*
+ * RmW on rseq_event_mask must be performed atomically
+ * with respect to preemption.
+ */
+ unsigned long rseq_event_mask;
+#endif
+
struct tlbflush_unmap_batch tlb_ubc;
struct rcu_head rcu;
@@ -1091,6 +1118,7 @@ struct task_struct {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/* Index of current stored address in ret_stack: */
int curr_ret_stack;
+ int curr_ret_depth;
/* Stack of return addresses for return function tracing: */
struct ftrace_ret_stack *ret_stack;
@@ -1118,7 +1146,7 @@ struct task_struct {
#ifdef CONFIG_KCOV
/* Coverage collection mode enabled for this task (0 if disabled): */
- enum kcov_mode kcov_mode;
+ unsigned int kcov_mode;
/* Size of the kcov_area: */
unsigned int kcov_size;
@@ -1137,6 +1165,13 @@ struct task_struct {
/* Number of pages to reclaim on returning to userland: */
unsigned int memcg_nr_pages_over_high;
+
+ /* Used by memcontrol for targeted memcg charge: */
+ struct mem_cgroup *active_memcg;
+#endif
+
+#ifdef CONFIG_BLK_CGROUP
+ struct request_queue *throttle_queue;
#endif
#ifdef CONFIG_UPROBES
@@ -1168,6 +1203,11 @@ struct task_struct {
void *security;
#endif
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ unsigned long lowest_stack;
+ unsigned long prev_lowest_stack;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
@@ -1187,27 +1227,7 @@ struct task_struct {
static inline struct pid *task_pid(struct task_struct *task)
{
- return task->pids[PIDTYPE_PID].pid;
-}
-
-static inline struct pid *task_tgid(struct task_struct *task)
-{
- return task->group_leader->pids[PIDTYPE_PID].pid;
-}
-
-/*
- * Without tasklist or RCU lock it is not safe to dereference
- * the result of task_pgrp/task_session even if task == current,
- * we can race with another thread doing sys_setsid/sys_setpgid.
- */
-static inline struct pid *task_pgrp(struct task_struct *task)
-{
- return task->group_leader->pids[PIDTYPE_PGID].pid;
-}
-
-static inline struct pid *task_session(struct task_struct *task)
-{
- return task->group_leader->pids[PIDTYPE_SID].pid;
+ return task->thread_pid;
}
/*
@@ -1256,7 +1276,7 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
*/
static inline int pid_alive(const struct task_struct *p)
{
- return p->pids[PIDTYPE_PID].pid != NULL;
+ return p->thread_pid != NULL;
}
static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
@@ -1282,12 +1302,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
- return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
+ return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
}
static inline pid_t task_tgid_vnr(struct task_struct *tsk)
{
- return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
+ return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
}
static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
@@ -1385,6 +1405,7 @@ extern struct pid *cad_pid;
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
+#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
@@ -1435,6 +1456,8 @@ static inline bool is_percpu_thread(void)
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
+#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
+#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
@@ -1466,6 +1489,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
+
+TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
@@ -1512,6 +1542,7 @@ static inline int task_nice(const struct task_struct *p)
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
+extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
@@ -1626,6 +1657,12 @@ static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
clear_ti_thread_flag(task_thread_info(tsk), flag);
}
+static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
+ bool value)
+{
+ update_ti_thread_flag(task_thread_info(tsk), flag, value);
+}
+
static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
@@ -1661,7 +1698,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
* explicit rescheduling in places that are safe. The return
* value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling,
- * cond_resched_softirq() will enable bhs before scheduling.
*/
#ifndef CONFIG_PREEMPT
extern int _cond_resched(void);
@@ -1681,13 +1717,6 @@ extern int __cond_resched_lock(spinlock_t *lock);
__cond_resched_lock(lock); \
})
-extern int __cond_resched_softirq(void);
-
-#define cond_resched_softirq() ({ \
- ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
- __cond_resched_softirq(); \
-})
-
static inline void cond_resched_rcu(void)
{
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
@@ -1764,4 +1793,127 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
+#ifdef CONFIG_RSEQ
+
+/*
+ * Map the event mask on the user-space ABI enum rseq_cs_flags
+ * for direct mask checks.
+ */
+enum rseq_event_mask_bits {
+ RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
+ RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
+ RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
+};
+
+enum rseq_event_mask {
+ RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
+ RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
+ RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
+};
+
+static inline void rseq_set_notify_resume(struct task_struct *t)
+{
+ if (t->rseq)
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+}
+
+void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
+
+static inline void rseq_handle_notify_resume(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+ if (current->rseq)
+ __rseq_handle_notify_resume(ksig, regs);
+}
+
+static inline void rseq_signal_deliver(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+ preempt_disable();
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
+ preempt_enable();
+ rseq_handle_notify_resume(ksig, regs);
+}
+
+/* rseq_preempt() requires preemption to be disabled. */
+static inline void rseq_preempt(struct task_struct *t)
+{
+ __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
+ rseq_set_notify_resume(t);
+}
+
+/* rseq_migrate() requires preemption to be disabled. */
+static inline void rseq_migrate(struct task_struct *t)
+{
+ __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
+ rseq_set_notify_resume(t);
+}
+
+/*
+ * If parent process has a registered restartable sequences area, the
+ * child inherits. Only applies when forking a process, not a thread.
+ */
+static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+{
+ if (clone_flags & CLONE_THREAD) {
+ t->rseq = NULL;
+ t->rseq_len = 0;
+ t->rseq_sig = 0;
+ t->rseq_event_mask = 0;
+ } else {
+ t->rseq = current->rseq;
+ t->rseq_len = current->rseq_len;
+ t->rseq_sig = current->rseq_sig;
+ t->rseq_event_mask = current->rseq_event_mask;
+ }
+}
+
+static inline void rseq_execve(struct task_struct *t)
+{
+ t->rseq = NULL;
+ t->rseq_len = 0;
+ t->rseq_sig = 0;
+ t->rseq_event_mask = 0;
+}
+
+#else
+
+static inline void rseq_set_notify_resume(struct task_struct *t)
+{
+}
+static inline void rseq_handle_notify_resume(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+}
+static inline void rseq_signal_deliver(struct ksignal *ksig,
+ struct pt_regs *regs)
+{
+}
+static inline void rseq_preempt(struct task_struct *t)
+{
+}
+static inline void rseq_migrate(struct task_struct *t)
+{
+}
+static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+{
+}
+static inline void rseq_execve(struct task_struct *t)
+{
+}
+
+#endif
+
+#ifdef CONFIG_DEBUG_RSEQ
+
+void rseq_syscall(struct pt_regs *regs);
+
+#else
+
+static inline void rseq_syscall(struct pt_regs *regs)
+{
+}
+
+#endif
+
#endif