summaryrefslogtreecommitdiff
path: root/drivers/base/cacheinfo.c
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2024-01-26 11:19:44 +0300
committerAndrew Morton <akpm@linux-foundation.org>2024-02-22 21:24:41 +0300
commit5cec4eb7fad6fb1e9a3dd8403b558d1eff7490ff (patch)
tree4c0221b586d5d937922a89ee0cb7c1c4b076192d /drivers/base/cacheinfo.c
parent96200c915040c2ec6f6c173a29caf9f7c9c15a7e (diff)
downloadlinux-5cec4eb7fad6fb1e9a3dd8403b558d1eff7490ff.tar.xz
mm and cache_info: remove unnecessary CPU cache info update
For each CPU hotplug event, we will update per-CPU data slice size and corresponding PCP configuration for every online CPU to make the implementation simple. But, Kyle reported that this takes tens seconds during boot on a machine with 34 zones and 3840 CPUs. So, in this patch, for each CPU hotplug event, we only update per-CPU data slice size and corresponding PCP configuration for the CPUs that share caches with the hotplugged CPU. With the patch, the system boot time reduces 67 seconds on the machine. Link: https://lkml.kernel.org/r/20240126081944.414520-1-ying.huang@intel.com Fixes: 362d37a106dd ("mm, pcp: reduce lock contention for draining high-order pages") Signed-off-by: "Huang, Ying" <ying.huang@intel.com> Originally-by: Kyle Meyer <kyle.meyer@hpe.com> Reported-and-tested-by: Kyle Meyer <kyle.meyer@hpe.com> Cc: Sudeep Holla <sudeep.holla@arm.com> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'drivers/base/cacheinfo.c')
-rw-r--r--drivers/base/cacheinfo.c50
1 files changed, 44 insertions, 6 deletions
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index f1e79263fe61..23b8cba4a2a3 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -898,6 +898,37 @@ err:
return rc;
}
+static unsigned int cpu_map_shared_cache(bool online, unsigned int cpu,
+ cpumask_t **map)
+{
+ struct cacheinfo *llc, *sib_llc;
+ unsigned int sibling;
+
+ if (!last_level_cache_is_valid(cpu))
+ return 0;
+
+ llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+ if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
+ return 0;
+
+ if (online) {
+ *map = &llc->shared_cpu_map;
+ return cpumask_weight(*map);
+ }
+
+ /* shared_cpu_map of offlined CPU will be cleared, so use sibling map */
+ for_each_cpu(sibling, &llc->shared_cpu_map) {
+ if (sibling == cpu || !last_level_cache_is_valid(sibling))
+ continue;
+ sib_llc = per_cpu_cacheinfo_idx(sibling, cache_leaves(sibling) - 1);
+ *map = &sib_llc->shared_cpu_map;
+ return cpumask_weight(*map);
+ }
+
+ return 0;
+}
+
/*
* Calculate the size of the per-CPU data cache slice. This can be
* used to estimate the size of the data cache slice that can be used
@@ -929,28 +960,31 @@ static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
ci->per_cpu_data_slice_size = llc->size / nr_shared;
}
-static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu)
+static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu,
+ cpumask_t *cpu_map)
{
unsigned int icpu;
- for_each_online_cpu(icpu) {
+ for_each_cpu(icpu, cpu_map) {
if (!cpu_online && icpu == cpu)
continue;
update_per_cpu_data_slice_size_cpu(icpu);
+ setup_pcp_cacheinfo(icpu);
}
}
static int cacheinfo_cpu_online(unsigned int cpu)
{
int rc = detect_cache_attributes(cpu);
+ cpumask_t *cpu_map;
if (rc)
return rc;
rc = cache_add_dev(cpu);
if (rc)
goto err;
- update_per_cpu_data_slice_size(true, cpu);
- setup_pcp_cacheinfo();
+ if (cpu_map_shared_cache(true, cpu, &cpu_map))
+ update_per_cpu_data_slice_size(true, cpu, cpu_map);
return 0;
err:
free_cache_attributes(cpu);
@@ -959,12 +993,16 @@ err:
static int cacheinfo_cpu_pre_down(unsigned int cpu)
{
+ cpumask_t *cpu_map;
+ unsigned int nr_shared;
+
+ nr_shared = cpu_map_shared_cache(false, cpu, &cpu_map);
if (cpumask_test_and_clear_cpu(cpu, &cache_dev_map))
cpu_cache_sysfs_exit(cpu);
free_cache_attributes(cpu);
- update_per_cpu_data_slice_size(false, cpu);
- setup_pcp_cacheinfo();
+ if (nr_shared > 1)
+ update_per_cpu_data_slice_size(false, cpu, cpu_map);
return 0;
}