summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/resctrl/rdtgroup.c
diff options
context:
space:
mode:
authorJames Morse <james.morse@arm.com>2024-02-13 21:44:38 +0300
committerBorislav Petkov (AMD) <bp@alien8.de>2024-02-19 21:28:07 +0300
commitfb700810d30b9eb333a7bf447012e1158e35c62f (patch)
tree1aea217e28a33705314379059f3ab59edd8c43a6 /arch/x86/kernel/cpu/resctrl/rdtgroup.c
parenteeff1d4f118bdf0870227fee5a770f03056e3adc (diff)
downloadlinux-fb700810d30b9eb333a7bf447012e1158e35c62f.tar.xz
x86/resctrl: Separate arch and fs resctrl locks
resctrl has one mutex that is taken by the architecture-specific code, and the filesystem parts. The two interact via cpuhp, where the architecture code updates the domain list. Filesystem handlers that walk the domains list should not run concurrently with the cpuhp callback modifying the list. Exposing a lock from the filesystem code means the interface is not cleanly defined, and creates the possibility of cross-architecture lock ordering headaches. The interaction only exists so that certain filesystem paths are serialised against CPU hotplug. The CPU hotplug code already has a mechanism to do this using cpus_read_lock(). MPAM's monitors have an overflow interrupt, so it needs to be possible to walk the domains list in irq context. RCU is ideal for this, but some paths need to be able to sleep to allocate memory. Because resctrl_{on,off}line_cpu() take the rdtgroup_mutex as part of a cpuhp callback, cpus_read_lock() must always be taken first. rdtgroup_schemata_write() already does this. Most of the filesystem code's domain list walkers are currently protected by the rdtgroup_mutex taken in rdtgroup_kn_lock_live(). The exceptions are rdt_bit_usage_show() and the mon_config helpers which take the lock directly. Make the domain list protected by RCU. An architecture-specific lock prevents concurrent writers. rdt_bit_usage_show() could walk the domain list using RCU, but to keep all the filesystem operations the same, this is changed to call cpus_read_lock(). The mon_config helpers send multiple IPIs, take the cpus_read_lock() in these cases. The other filesystem list walkers need to be able to sleep. Add cpus_read_lock() to rdtgroup_kn_lock_live() so that the cpuhp callbacks can't be invoked when file system operations are occurring. Add lockdep_assert_cpus_held() in the cases where the rdtgroup_kn_lock_live() call isn't obvious. Resctrl's domain online/offline calls now need to take the rdtgroup_mutex themselves. [ bp: Fold in a build fix: https://lore.kernel.org/r/87zfvwieli.ffs@tglx ] Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Reviewed-by: Babu Moger <babu.moger@amd.com> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Babu Moger <babu.moger@amd.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64 Link: https://lore.kernel.org/r/20240213184438.16675-25-james.morse@arm.com Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl/rdtgroup.c')
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c68
1 files changed, 54 insertions, 14 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 777e9f680332..011e17efb1a6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -35,6 +35,10 @@
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
static struct kernfs_root *rdt_root;
struct rdtgroup rdtgroup_default;
LIST_HEAD(rdt_all_groups);
@@ -1014,6 +1018,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
bool sep = false;
u32 ctrl_val;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
list_for_each_entry(dom, &r->domains, list) {
@@ -1074,6 +1079,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
}
seq_putc(seq, '\n');
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return 0;
}
@@ -1329,6 +1335,9 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
struct rdt_domain *d;
u32 ctrl;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
@@ -1593,6 +1602,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
struct rdt_domain *dom;
bool sep = false;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
list_for_each_entry(dom, &r->domains, list) {
@@ -1609,6 +1619,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
seq_puts(s, "\n");
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return 0;
}
@@ -1690,6 +1701,9 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
unsigned long dom_id, val;
struct rdt_domain *d;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
next:
if (!tok || tok[0] == '\0')
return 0;
@@ -1736,6 +1750,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
if (nbytes == 0 || buf[nbytes - 1] != '\n')
return -EINVAL;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
rdt_last_cmd_clear();
@@ -1745,6 +1760,7 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return ret ?: nbytes;
}
@@ -1760,6 +1776,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
if (nbytes == 0 || buf[nbytes - 1] != '\n')
return -EINVAL;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
rdt_last_cmd_clear();
@@ -1769,6 +1786,7 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return ret ?: nbytes;
}
@@ -2245,6 +2263,9 @@ static int set_cache_qos_cfg(int level, bool enable)
struct rdt_domain *d;
int cpu;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (level == RDT_RESOURCE_L3)
update = l3_qos_cfg_update;
else if (level == RDT_RESOURCE_L2)
@@ -2444,6 +2465,7 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
rdtgroup_kn_get(rdtgrp, kn);
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
/* Was this group deleted while we waited? */
@@ -2461,6 +2483,8 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
return;
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
rdtgroup_kn_put(rdtgrp, kn);
}
@@ -2793,6 +2817,9 @@ static int reset_all_ctrls(struct rdt_resource *r)
struct rdt_domain *d;
int i;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
@@ -3077,6 +3104,9 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_domain *dom;
int ret;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
list_for_each_entry(dom, &r->domains, list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
@@ -3907,13 +3937,13 @@ static void domain_destroy_mon_state(struct rdt_domain *d)
void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
{
- lockdep_assert_held(&rdtgroup_mutex);
+ mutex_lock(&rdtgroup_mutex);
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
mba_sc_domain_destroy(r, d);
if (!r->mon_capable)
- return;
+ goto out_unlock;
/*
* If resctrl is mounted, remove all the
@@ -3938,6 +3968,9 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
}
domain_destroy_mon_state(d);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
}
static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
@@ -3973,20 +4006,22 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
{
- int err;
+ int err = 0;
- lockdep_assert_held(&rdtgroup_mutex);
+ mutex_lock(&rdtgroup_mutex);
- if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
+ if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
/* RDT_RESOURCE_MBA is never mon_capable */
- return mba_sc_domain_allocate(r, d);
+ err = mba_sc_domain_allocate(r, d);
+ goto out_unlock;
+ }
if (!r->mon_capable)
- return 0;
+ goto out_unlock;
err = domain_setup_mon_state(r, d);
if (err)
- return err;
+ goto out_unlock;
if (is_mbm_enabled()) {
INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
@@ -4006,15 +4041,18 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
if (resctrl_mounted && resctrl_arch_mon_capable())
mkdir_mondata_subdir_allrdtgrp(r, d);
- return 0;
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
}
void resctrl_online_cpu(unsigned int cpu)
{
- lockdep_assert_held(&rdtgroup_mutex);
-
+ mutex_lock(&rdtgroup_mutex);
/* The CPU is set in default rdtgroup after online. */
cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+ mutex_unlock(&rdtgroup_mutex);
}
static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
@@ -4033,8 +4071,7 @@ void resctrl_offline_cpu(unsigned int cpu)
struct rdtgroup *rdtgrp;
struct rdt_domain *d;
- lockdep_assert_held(&rdtgroup_mutex);
-
+ mutex_lock(&rdtgroup_mutex);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
clear_childcpus(rdtgrp, cpu);
@@ -4043,7 +4080,7 @@ void resctrl_offline_cpu(unsigned int cpu)
}
if (!l3->mon_capable)
- return;
+ goto out_unlock;
d = get_domain_from_cpu(cpu, l3);
if (d) {
@@ -4057,6 +4094,9 @@ void resctrl_offline_cpu(unsigned int cpu)
cqm_setup_limbo_handler(d, 0, cpu);
}
}
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
}
/*