From 421fc858023b839106a9c0dc42cd8947cf981d83 Mon Sep 17 00:00:00 2001 From: Atul Kumar Pant Date: Mon, 6 Nov 2023 23:40:34 +0530 Subject: selftests: cgroup: Fixes a typo in a comment Signed-off-by: Atul Kumar Pant Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_freezer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index ff519029f6f4..8845353aca53 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -740,7 +740,7 @@ static int test_cgfreezer_ptraced(const char *root) /* * cg_check_frozen(cgroup, true) will fail here, - * because the task in in the TRACEd state. + * because the task is in the TRACEd state. */ if (cg_freeze_wait(cgroup, false)) goto cleanup; -- cgit v1.2.3 From 14060dfc481a309e3f236127b0a77abbf249648f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 25 Oct 2023 14:25:53 -0400 Subject: selftests/cgroup: Minor code cleanup and reorganization of test_cpuset_prs.sh Minor cleanup of test matrix and relocation of test_isolated() function to prepare for the next patch. There is no functional change. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 142 +++++++++++----------- 1 file changed, 71 insertions(+), 71 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index a6e9848189d6..2b825019f806 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -146,71 +146,6 @@ test_add_proc() echo $$ > $CGROUP2/cgroup.procs # Move out the task } -# -# Testing the new "isolated" partition root type -# -test_isolated() -{ - cd $CGROUP2/test - echo 2-3 > cpuset.cpus - TYPE=$(cat cpuset.cpus.partition) - [[ $TYPE = member ]] || echo member > cpuset.cpus.partition - - console_msg "Change from member to root" - test_partition root - - console_msg "Change from root to isolated" - test_partition isolated - - console_msg "Change from isolated to member" - test_partition member - - console_msg "Change from member to isolated" - test_partition isolated - - console_msg "Change from isolated to root" - test_partition root - - console_msg "Change from root to member" - test_partition member - - # - # Testing partition root with no cpu - # - console_msg "Distribute all cpus to child partition" - echo +cpuset > cgroup.subtree_control - test_partition root - - mkdir A1 - cd A1 - echo 2-3 > cpuset.cpus - test_partition root - test_effective_cpus 2-3 - cd .. - test_effective_cpus "" - - console_msg "Moving task to partition test" - test_add_proc "No space left" - cd A1 - test_add_proc "" - cd .. - - console_msg "Shrink and expand child partition" - cd A1 - echo 2 > cpuset.cpus - cd .. - test_effective_cpus 3 - cd A1 - echo 2-3 > cpuset.cpus - cd .. - test_effective_cpus "" - - # Cleaning up - console_msg "Cleaning up" - echo $$ > $CGROUP2/cgroup.procs - [[ -d A1 ]] && rmdir A1 -} - # # Cpuset controller state transition test matrix. # @@ -304,7 +239,7 @@ TEST_MATRIX=( A1:P0,A2:P2,A3:P1 2-4" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1 ." + A1:P0,A2:P-2,A3:P-1" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ A1:P0,A2:P2,A3:P-1 2-4" @@ -347,10 +282,10 @@ TEST_MATRIX=( # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" @@ -359,13 +294,13 @@ TEST_MATRIX=( A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -804,6 +739,71 @@ run_state_test() echo "All $I tests of $TEST PASSED." } +# +# Testing the new "isolated" partition root type +# +test_isolated() +{ + cd $CGROUP2/test + echo 2-3 > cpuset.cpus + TYPE=$(cat cpuset.cpus.partition) + [[ $TYPE = member ]] || echo member > cpuset.cpus.partition + + console_msg "Change from member to root" + test_partition root + + console_msg "Change from root to isolated" + test_partition isolated + + console_msg "Change from isolated to member" + test_partition member + + console_msg "Change from member to isolated" + test_partition isolated + + console_msg "Change from isolated to root" + test_partition root + + console_msg "Change from root to member" + test_partition member + + # + # Testing partition root with no cpu + # + console_msg "Distribute all cpus to child partition" + echo +cpuset > cgroup.subtree_control + test_partition root + + mkdir A1 + cd A1 + echo 2-3 > cpuset.cpus + test_partition root + test_effective_cpus 2-3 + cd .. + test_effective_cpus "" + + console_msg "Moving task to partition test" + test_add_proc "No space left" + cd A1 + test_add_proc "" + cd .. + + console_msg "Shrink and expand child partition" + cd A1 + echo 2 > cpuset.cpus + cd .. + test_effective_cpus 3 + cd A1 + echo 2-3 > cpuset.cpus + cd .. + test_effective_cpus "" + + # Cleaning up + console_msg "Cleaning up" + echo $$ > $CGROUP2/cgroup.procs + [[ -d A1 ]] && rmdir A1 +} + # # Wait for inotify event for the given file and read it # $1: cgroup file to wait for -- cgit v1.2.3 From 72c6303acfa1008c542e093bc9f9916fb99e0323 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 25 Oct 2023 14:25:55 -0400 Subject: cgroup/cpuset: Take isolated CPUs out of workqueue unbound cpumask To make CPUs in isolated cpuset partition closer in isolation to the boot time isolated CPUs specified in the "isolcpus" boot command line option, we need to take those CPUs out of the workqueue unbound cpumask so that work functions from the unbound workqueues won't run on those CPUs. Otherwise, they will interfere the user tasks running on those isolated CPUs. With the introduction of the workqueue_unbound_exclude_cpumask() helper function in an earlier commit, those isolated CPUs can now be taken out from the workqueue unbound cpumask. This patch also updates cgroup-v2.rst to mention that isolated CPUs will be excluded from unbound workqueue cpumask as well as updating test_cpuset_prs.sh to verify the correctness of the new *cpuset.cpus.isolated file, if available via cgroup_debug option. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 10 +- kernel/cgroup/cpuset.c | 116 ++++++++++++++++++---- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 74 ++++++++++++-- 3 files changed, 166 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 3f85254f3cef..cf5651a11df8 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2358,11 +2358,11 @@ Cpuset Interface Files partition or scheduling domain. The set of exclusive CPUs is determined by the value of its "cpuset.cpus.exclusive.effective". - When set to "isolated", the CPUs in that partition will - be in an isolated state without any load balancing from the - scheduler. Tasks placed in such a partition with multiple - CPUs should be carefully distributed and bound to each of the - individual CPUs for optimal performance. + When set to "isolated", the CPUs in that partition will be in + an isolated state without any load balancing from the scheduler + and excluded from the unbound workqueues. Tasks placed in such + a partition with multiple CPUs should be carefully distributed + and bound to each of the individual CPUs for optimal performance. A partition root ("root" or "isolated") can be in one of the two possible states - valid or invalid. An invalid partition diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 19c8779798fd..1bad4007ff4b 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); @@ -1444,25 +1446,31 @@ static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *x * @new_prs: new partition_root_state * @parent: parent cpuset * @xcpus: exclusive CPUs to be added + * Return: true if isolated_cpus modified, false otherwise * * Remote partition if parent == NULL */ -static void partition_xcpus_add(int new_prs, struct cpuset *parent, +static bool partition_xcpus_add(int new_prs, struct cpuset *parent, struct cpumask *xcpus) { + bool isolcpus_updated; + WARN_ON_ONCE(new_prs < 0); lockdep_assert_held(&callback_lock); if (!parent) parent = &top_cpuset; + if (parent == &top_cpuset) cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus); - if (new_prs != parent->partition_root_state) + isolcpus_updated = (new_prs != parent->partition_root_state); + if (isolcpus_updated) partition_xcpus_newstate(parent->partition_root_state, new_prs, xcpus); cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus); + return isolcpus_updated; } /* @@ -1470,12 +1478,15 @@ static void partition_xcpus_add(int new_prs, struct cpuset *parent, * @old_prs: old partition_root_state * @parent: parent cpuset * @xcpus: exclusive CPUs to be removed + * Return: true if isolated_cpus modified, false otherwise * * Remote partition if parent == NULL */ -static void partition_xcpus_del(int old_prs, struct cpuset *parent, +static bool partition_xcpus_del(int old_prs, struct cpuset *parent, struct cpumask *xcpus) { + bool isolcpus_updated; + WARN_ON_ONCE(old_prs < 0); lockdep_assert_held(&callback_lock); if (!parent) @@ -1484,12 +1495,27 @@ static void partition_xcpus_del(int old_prs, struct cpuset *parent, if (parent == &top_cpuset) cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus); - if (old_prs != parent->partition_root_state) + isolcpus_updated = (old_prs != parent->partition_root_state); + if (isolcpus_updated) partition_xcpus_newstate(old_prs, parent->partition_root_state, xcpus); cpumask_and(xcpus, xcpus, cpu_active_mask); cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); + return isolcpus_updated; +} + +static void update_unbound_workqueue_cpumask(bool isolcpus_updated) +{ + int ret; + + lockdep_assert_cpus_held(); + + if (!isolcpus_updated) + return; + + ret = workqueue_unbound_exclude_cpumask(isolated_cpus); + WARN_ON_ONCE(ret < 0); } /* @@ -1540,6 +1566,8 @@ static inline bool is_local_partition(struct cpuset *cs) static int remote_partition_enable(struct cpuset *cs, int new_prs, struct tmpmasks *tmp) { + bool isolcpus_updated; + /* * The user must have sysadmin privilege. */ @@ -1561,7 +1589,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, return 0; spin_lock_irq(&callback_lock); - partition_xcpus_add(new_prs, NULL, tmp->new_cpus); + isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus); list_add(&cs->remote_sibling, &remote_children); if (cs->use_parent_ecpus) { struct cpuset *parent = parent_cs(cs); @@ -1570,13 +1598,13 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, parent->child_ecpus_count--; } spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); /* * Proprogate changes in top_cpuset's effective_cpus down the hierarchy. */ update_tasks_cpumask(&top_cpuset, tmp->new_cpus); update_sibling_cpumasks(&top_cpuset, NULL, tmp); - return 1; } @@ -1591,18 +1619,22 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, */ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) { + bool isolcpus_updated; + compute_effective_exclusive_cpumask(cs, tmp->new_cpus); WARN_ON_ONCE(!is_remote_partition(cs)); WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); spin_lock_irq(&callback_lock); list_del_init(&cs->remote_sibling); - partition_xcpus_del(cs->partition_root_state, NULL, tmp->new_cpus); + isolcpus_updated = partition_xcpus_del(cs->partition_root_state, + NULL, tmp->new_cpus); cs->partition_root_state = -cs->partition_root_state; if (!cs->prs_err) cs->prs_err = PERR_INVCPUS; reset_partition_data(cs); spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); /* * Proprogate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1625,6 +1657,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, { bool adding, deleting; int prs = cs->partition_root_state; + int isolcpus_updated = 0; if (WARN_ON_ONCE(!is_remote_partition(cs))) return; @@ -1649,10 +1682,11 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, spin_lock_irq(&callback_lock); if (adding) - partition_xcpus_add(prs, NULL, tmp->addmask); + isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask); if (deleting) - partition_xcpus_del(prs, NULL, tmp->delmask); + isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask); spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); /* * Proprogate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1774,6 +1808,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, int part_error = PERR_NONE; /* Partition error? */ int subparts_delta = 0; struct cpumask *xcpus; /* cs effective_xcpus */ + int isolcpus_updated = 0; bool nocpu; lockdep_assert_held(&cpuset_mutex); @@ -2010,15 +2045,18 @@ write_error: * and vice versa. */ if (adding) - partition_xcpus_del(old_prs, parent, tmp->addmask); + isolcpus_updated += partition_xcpus_del(old_prs, parent, + tmp->addmask); if (deleting) - partition_xcpus_add(new_prs, parent, tmp->delmask); + isolcpus_updated += partition_xcpus_add(new_prs, parent, + tmp->delmask); if (is_partition_valid(parent)) { parent->nr_subparts += subparts_delta; WARN_ON_ONCE(parent->nr_subparts < 0); } spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); if ((old_prs != new_prs) && (cmd == partcmd_update)) update_partition_exclusive(cs, new_prs); @@ -3082,6 +3120,7 @@ out: else if (new_xcpus_state) partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus); spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(new_xcpus_state); /* Force update if switching back to member */ update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0); @@ -4370,6 +4409,30 @@ void cpuset_force_rebuild(void) force_rebuild = true; } +/* + * Attempt to acquire a cpus_read_lock while a hotplug operation may be in + * progress. + * Return: true if successful, false otherwise + * + * To avoid circular lock dependency between cpuset_mutex and cpus_read_lock, + * cpus_read_trylock() is used here to acquire the lock. + */ +static bool cpuset_hotplug_cpus_read_trylock(void) +{ + int retries = 0; + + while (!cpus_read_trylock()) { + /* + * CPU hotplug still in progress. Retry 5 times + * with a 10ms wait before bailing out. + */ + if (++retries > 5) + return false; + msleep(10); + } + return true; +} + /** * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest @@ -4386,6 +4449,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) bool cpus_updated; bool mems_updated; bool remote; + int partcmd = -1; struct cpuset *parent; retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -4417,11 +4481,13 @@ retry: compute_partition_effective_cpumask(cs, &new_cpus); if (remote && cpumask_empty(&new_cpus) && - partition_is_populated(cs, NULL)) { + partition_is_populated(cs, NULL) && + cpuset_hotplug_cpus_read_trylock()) { remote_partition_disable(cs, tmp); compute_effective_cpumask(&new_cpus, cs, parent); remote = false; cpuset_force_rebuild(); + cpus_read_unlock(); } /* @@ -4432,18 +4498,28 @@ retry: * partitions. */ if (is_local_partition(cs) && (!is_partition_valid(parent) || - tasks_nocpu_error(parent, cs, &new_cpus))) { - update_parent_effective_cpumask(cs, partcmd_invalidate, NULL, tmp); - compute_effective_cpumask(&new_cpus, cs, parent); - cpuset_force_rebuild(); - } + tasks_nocpu_error(parent, cs, &new_cpus))) + partcmd = partcmd_invalidate; /* * On the other hand, an invalid partition root may be transitioned * back to a regular one. */ - else if (is_partition_valid(parent) && is_partition_invalid(cs)) { - update_parent_effective_cpumask(cs, partcmd_update, NULL, tmp); - if (is_partition_valid(cs)) { + else if (is_partition_valid(parent) && is_partition_invalid(cs)) + partcmd = partcmd_update; + + /* + * cpus_read_lock needs to be held before calling + * update_parent_effective_cpumask(). To avoid circular lock + * dependency between cpuset_mutex and cpus_read_lock, + * cpus_read_trylock() is used here to acquire the lock. + */ + if (partcmd >= 0) { + if (!cpuset_hotplug_cpus_read_trylock()) + goto update_tasks; + + update_parent_effective_cpumask(cs, partcmd, NULL, tmp); + cpus_read_unlock(); + if ((partcmd == partcmd_invalidate) || is_partition_valid(cs)) { compute_partition_effective_cpumask(cs, &new_cpus); cpuset_force_rebuild(); } diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 2b825019f806..7b7c4c2b6d85 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -232,11 +232,11 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ A1:P0,A2:P1,A3:P2,B1:P1 2-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4" + A1:P0,A2:P2,A3:P1 2-4,2-3" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ A1:P0,A2:P-2,A3:P-1" @@ -248,7 +248,7 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," # An invalidated remote partition cannot self-recover from hotplug " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" @@ -376,7 +376,7 @@ write_cpu_online() } fi echo $VAL > $CPUFILE - pause 0.01 + pause 0.05 } # @@ -508,12 +508,14 @@ dump_states() XECPUS=$DIR/cpuset.cpus.exclusive.effective PRS=$DIR/cpuset.cpus.partition PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions + ISCPUS=$DIR/.__DEBUG__.cpuset.cpus.isolated [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)" [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)" [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)" [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)" [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)" + [[ -e $ISCPUS ]] && echo "$ISCPUS: $(cat $ISCPUS)" done } @@ -591,11 +593,17 @@ check_cgroup_states() # # Get isolated (including offline) CPUs by looking at -# /sys/kernel/debug/sched/domains and compare that with the expected value. +# /sys/kernel/debug/sched/domains and *cpuset.cpus.isolated control file, +# if available, and compare that with the expected value. # -# Note that a sched domain of just 1 CPU will be considered isolated. +# Note that isolated CPUs from the sched/domains context include offline +# CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may +# not be included in the *cpuset.cpus.isolated control file which contains +# only CPUs in isolated partitions. # -# $1 - expected isolated cpu list +# $1 - expected isolated cpu list(s) {,} +# - expected sched/domains value +# - *cpuset.cpus.isolated value = if not defined # check_isolcpus() { @@ -603,8 +611,38 @@ check_isolcpus() ISOLCPUS= LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains + ISCPUS=${CGROUP2}/.__DEBUG__.cpuset.cpus.isolated + if [[ $EXPECT_VAL = . ]] + then + EXPECT_VAL= + EXPECT_VAL2= + elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + then + set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") + EXPECT_VAL=$1 + EXPECT_VAL2=$2 + else + EXPECT_VAL2=$EXPECT_VAL + fi + + # + # Check the debug isolated cpumask, if present + # + [[ -f $ISCPUS ]] && { + ISOLCPUS=$(cat $ISCPUS) + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + # Take a 50ms pause and try again + pause 0.05 + ISOLCPUS=$(cat $ISCPUS) + } + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + ISOLCPUS= + } + + # + # Use the sched domain in debugfs to check isolated CPUs, if available + # [[ -d $SCHED_DOMAINS ]] || return 0 - [[ $EXPECT_VAL = . ]] && EXPECT_VAL= for ((CPU=0; CPU < $NR_CPUS; CPU++)) do @@ -648,6 +686,22 @@ test_fail() exit 1 } +# +# Check to see if there are unexpected isolated CPUs left +# +null_isolcpus_check() +{ + [[ $VERBOSE -gt 0 ]] || return 0 + pause 0.02 + check_isolcpus "." + if [[ $? -ne 0 ]] + then + echo "Unexpected isolated CPUs: $ISOLCPUS" + dump_states + exit 1 + fi +} + # # Run cpuset state transition test # $1 - test matrix name @@ -733,6 +787,7 @@ run_state_test() echo "Effective cpus changed to $NEWLIST after test $I!" exit 1 } + null_isolcpus_check [[ $VERBOSE -gt 0 ]] && echo "Test $I done." ((I++)) done @@ -802,6 +857,7 @@ test_isolated() console_msg "Cleaning up" echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 + null_isolcpus_check } # -- cgit v1.2.3 From 877c737db9355acaa1ec2fd2b8dbdaff82605df7 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 27 Nov 2023 14:51:05 -0500 Subject: cgroup/cpuset: Expose cpuset.cpus.isolated The root-only cpuset.cpus.isolated control file shows the current set of isolated CPUs in isolated partitions. This control file is currently exposed only with the cgroup_debug boot command line option which also adds the ".__DEBUG__." prefix. This is actually a useful control file if users want to find out which CPUs are currently in an isolated state by the cpuset controller. Remove CFTYPE_DEBUG flag for this control file and make it available by default without any prefix. The test_cpuset_prs.sh test script and the cgroup-v2.rst documentation file are also updated accordingly. Minor code change is also made in test_cpuset_prs.sh to avoid false test failure when running on debug kernel. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 7 +++++ kernel/cgroup/cpuset.c | 2 +- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 32 +++++++++++++---------- 3 files changed, 26 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index cf5651a11df8..30f6ff2eba47 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2316,6 +2316,13 @@ Cpuset Interface Files treated to have an implicit value of "cpuset.cpus" in the formation of local partition. + cpuset.cpus.isolated + A read-only and root cgroup only multiple values file. + + This file shows the set of all isolated CPUs used in existing + isolated partitions. It will be empty if no isolated partition + is created. + cpuset.cpus.partition A read-write single value file which exists on non-root cpuset-enabled cgroups. This flag is owned by the parent cgroup diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 1bad4007ff4b..2a16df86c55c 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3974,7 +3974,7 @@ static struct cftype dfl_files[] = { .name = "cpus.isolated", .seq_show = cpuset_common_seq_show, .private = FILE_ISOLATED_CPULIST, - .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG, + .flags = CFTYPE_ONLY_ON_ROOT, }, { } /* terminate */ diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 7b7c4c2b6d85..b5eb1be2248c 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -508,7 +508,7 @@ dump_states() XECPUS=$DIR/cpuset.cpus.exclusive.effective PRS=$DIR/cpuset.cpus.partition PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions - ISCPUS=$DIR/.__DEBUG__.cpuset.cpus.isolated + ISCPUS=$DIR/cpuset.cpus.isolated [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)" [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)" [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" @@ -593,17 +593,17 @@ check_cgroup_states() # # Get isolated (including offline) CPUs by looking at -# /sys/kernel/debug/sched/domains and *cpuset.cpus.isolated control file, +# /sys/kernel/debug/sched/domains and cpuset.cpus.isolated control file, # if available, and compare that with the expected value. # # Note that isolated CPUs from the sched/domains context include offline # CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may -# not be included in the *cpuset.cpus.isolated control file which contains +# not be included in the cpuset.cpus.isolated control file which contains # only CPUs in isolated partitions. # # $1 - expected isolated cpu list(s) {,} # - expected sched/domains value -# - *cpuset.cpus.isolated value = if not defined +# - cpuset.cpus.isolated value = if not defined # check_isolcpus() { @@ -611,7 +611,7 @@ check_isolcpus() ISOLCPUS= LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/.__DEBUG__.cpuset.cpus.isolated + ISCPUS=${CGROUP2}/cpuset.cpus.isolated if [[ $EXPECT_VAL = . ]] then EXPECT_VAL= @@ -692,14 +692,18 @@ test_fail() null_isolcpus_check() { [[ $VERBOSE -gt 0 ]] || return 0 - pause 0.02 - check_isolcpus "." - if [[ $? -ne 0 ]] - then - echo "Unexpected isolated CPUs: $ISOLCPUS" - dump_states - exit 1 - fi + # Retry a few times before printing error + RETRY=0 + while [[ $RETRY -lt 5 ]] + do + pause 0.01 + check_isolcpus "." + [[ $? -eq 0 ]] && return 0 + ((RETRY++)) + done + echo "Unexpected isolated CPUs: $ISOLCPUS" + dump_states + exit 1 } # @@ -776,7 +780,7 @@ run_state_test() # NEWLIST=$(cat cpuset.cpus.effective) RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 5 ]] + while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] do # Wait a bit longer & recheck a few times pause 0.01 -- cgit v1.2.3