summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorBrian Chen <brianchen118@gmail.com>2021-11-11 00:33:12 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2022-01-27 13:04:27 +0300
commitd168123f1388882bf5b41d07e5534a319f5fff1a (patch)
treecf56dc7baf840fdffda405f6fc96ee105fb6d7bb /include/linux
parent07888a4fbcf23dd79c2e361ded5b9bd98e881a7f (diff)
downloadlinux-d168123f1388882bf5b41d07e5534a319f5fff1a.tar.xz
psi: Fix PSI_MEM_FULL state when tasks are in memstall and doing reclaim
[ Upstream commit cb0e52b7748737b2cf6481fdd9b920ce7e1ebbdf ] We've noticed cases where tasks in a cgroup are stalled on memory but there is little memory FULL pressure since tasks stay on the runqueue in reclaim. A simple example involves a single threaded program that keeps leaking and touching large amounts of memory. It runs in a cgroup with swap enabled, memory.high set at 10M and cpu.max ratio set at 5%. Though there is significant CPU pressure and memory SOME, there is barely any memory FULL since the task enters reclaim and stays on the runqueue. However, this memory-bound task is effectively stalled on memory and we expect memory FULL to match memory SOME in this scenario. The code is confused about memstall && running, thinking there is a stalled task and a productive task when there's only one task: a reclaimer that's counted as both. To fix this, we redefine the condition for PSI_MEM_FULL to check that all running tasks are in an active memstall instead of checking that there are no running tasks. case PSI_MEM_FULL: - return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]); + return unlikely(tasks[NR_MEMSTALL] && + tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]); This will capture reclaimers. It will also capture tasks that called psi_memstall_enter() and are about to sleep, but this should be negligible noise. Signed-off-by: Brian Chen <brianchen118@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Link: https://lore.kernel.org/r/20211110213312.310243-1-brianchen118@gmail.com Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/psi_types.h13
1 files changed, 12 insertions, 1 deletions
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 0a23300d49af..0819c82dba92 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -21,7 +21,17 @@ enum psi_task_count {
* don't have to special case any state tracking for it.
*/
NR_ONCPU,
- NR_PSI_TASK_COUNTS = 4,
+ /*
+ * For IO and CPU stalls the presence of running/oncpu tasks
+ * in the domain means a partial rather than a full stall.
+ * For memory it's not so simple because of page reclaimers:
+ * they are running/oncpu while representing a stall. To tell
+ * whether a domain has productivity left or not, we need to
+ * distinguish between regular running (i.e. productive)
+ * threads and memstall ones.
+ */
+ NR_MEMSTALL_RUNNING,
+ NR_PSI_TASK_COUNTS = 5,
};
/* Task state bitmasks */
@@ -29,6 +39,7 @@ enum psi_task_count {
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
#define TSK_RUNNING (1 << NR_RUNNING)
#define TSK_ONCPU (1 << NR_ONCPU)
+#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
/* Resources that workloads could be stalled on */
enum psi_res {