summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAndrii Nakryiko <andrii@kernel.org>2024-04-25 00:52:14 +0300
committerMasami Hiramatsu (Google) <mhiramat@kernel.org>2024-05-01 17:18:48 +0300
commit78d0b16127daa26d016c215a089ae330878291f7 (patch)
treed9fef1a56441ea32fd2e5cae6d6fb39b71be0350 /lib
parenta3b00f10da808bd4a354f890b551cba471082d0e (diff)
downloadlinux-78d0b16127daa26d016c215a089ae330878291f7.tar.xz
objpool: cache nr_possible_cpus() and avoid caching nr_cpu_ids
Profiling shows that calling nr_possible_cpus() in objpool_pop() takes a noticeable amount of CPU (when profiled on 80-core machine), as we need to recalculate number of set bits in a CPU bit mask. This number can't change, so there is no point in paying the price for recalculating it. As such, cache this value in struct objpool_head and use it in objpool_pop(). On the other hand, cached pool->nr_cpus isn't necessary, as it's not used in hot path and is also a pretty trivial value to retrieve. So drop pool->nr_cpus in favor of using nr_cpu_ids everywhere. This way the size of struct objpool_head remains the same, which is a nice bonus. Same BPF selftests benchmarks were used to evaluate the effect. Using changes in previous patch (inlining of objpool_pop/objpool_push) as baseline, here are the differences: BASELINE ======== kretprobe : 9.937 ± 0.174M/s kretprobe-multi: 10.440 ± 0.108M/s AFTER ===== kretprobe : 10.106 ± 0.120M/s (+1.7%) kretprobe-multi: 10.515 ± 0.180M/s (+0.7%) Link: https://lore.kernel.org/all/20240424215214.3956041-3-andrii@kernel.org/ Cc: Matt (Qiang) Wu <wuqiang.matt@bytedance.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/objpool.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/lib/objpool.c b/lib/objpool.c
index f696308fc026..234f9d0bd081 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -50,7 +50,7 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
{
int i, cpu_count = 0;
- for (i = 0; i < pool->nr_cpus; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
struct objpool_slot *slot;
int nodes, size, rc;
@@ -60,8 +60,8 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
continue;
/* compute how many objects to be allocated with this slot */
- nodes = nr_objs / num_possible_cpus();
- if (cpu_count < (nr_objs % num_possible_cpus()))
+ nodes = nr_objs / pool->nr_possible_cpus;
+ if (cpu_count < (nr_objs % pool->nr_possible_cpus))
nodes++;
cpu_count++;
@@ -103,7 +103,7 @@ static void objpool_fini_percpu_slots(struct objpool_head *pool)
if (!pool->cpu_slots)
return;
- for (i = 0; i < pool->nr_cpus; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
kvfree(pool->cpu_slots[i]);
kfree(pool->cpu_slots);
}
@@ -130,13 +130,13 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
/* initialize objpool pool */
memset(pool, 0, sizeof(struct objpool_head));
- pool->nr_cpus = nr_cpu_ids;
+ pool->nr_possible_cpus = num_possible_cpus();
pool->obj_size = object_size;
pool->capacity = capacity;
pool->gfp = gfp & ~__GFP_ZERO;
pool->context = context;
pool->release = release;
- slot_size = pool->nr_cpus * sizeof(struct objpool_slot);
+ slot_size = nr_cpu_ids * sizeof(struct objpool_slot);
pool->cpu_slots = kzalloc(slot_size, pool->gfp);
if (!pool->cpu_slots)
return -ENOMEM;