From f371f2dc53d107af25171f29c852a3908ee0afb6 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Mon, 13 Feb 2023 09:15:19 +0000 Subject: selftest/bpf/benchs: Add benchmark for hashmap lookups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new benchmark which measures hashmap lookup operations speed. A user can control the following parameters of the benchmark: * key_size (max 1024): the key size to use * max_entries: the hashmap max entries * nr_entries: the number of entries to insert/lookup * nr_loops: the number of loops for the benchmark * map_flags The hashmap flags passed to BPF_MAP_CREATE The BPF program performing the benchmarks calls two nested bpf_loop: bpf_loop(nr_loops/nr_entries) bpf_loop(nr_entries) bpf_map_lookup() So the nr_loops determines the number of actual map lookups. All lookups are successful. Example (the output is generated on a AMD Ryzen 9 3950X machine): for nr_entries in `seq 4096 4096 65536`; do echo -n "$((nr_entries*100/65536))% full: "; sudo ./bench -d2 -a bpf-hashmap-lookup --key_size=4 --nr_entries=$nr_entries --max_entries=65536 --nr_loops=1000000 --map_flags=0x40 | grep cpu; done 6% full: cpu01: lookup 50.739M ± 0.018M events/sec (approximated from 32 samples of ~19ms) 12% full: cpu01: lookup 47.751M ± 0.015M events/sec (approximated from 32 samples of ~20ms) 18% full: cpu01: lookup 45.153M ± 0.013M events/sec (approximated from 32 samples of ~22ms) 25% full: cpu01: lookup 43.826M ± 0.014M events/sec (approximated from 32 samples of ~22ms) 31% full: cpu01: lookup 41.971M ± 0.012M events/sec (approximated from 32 samples of ~23ms) 37% full: cpu01: lookup 41.034M ± 0.015M events/sec (approximated from 32 samples of ~24ms) 43% full: cpu01: lookup 39.946M ± 0.012M events/sec (approximated from 32 samples of ~25ms) 50% full: cpu01: lookup 38.256M ± 0.014M events/sec (approximated from 32 samples of ~26ms) 56% full: cpu01: lookup 36.580M ± 0.018M events/sec (approximated from 32 samples of ~27ms) 62% full: cpu01: lookup 36.252M ± 0.012M events/sec (approximated from 32 samples of ~27ms) 68% full: cpu01: lookup 35.200M ± 0.012M events/sec (approximated from 32 samples of ~28ms) 75% full: cpu01: lookup 34.061M ± 0.009M events/sec (approximated from 32 samples of ~29ms) 81% full: cpu01: lookup 34.374M ± 0.010M events/sec (approximated from 32 samples of ~29ms) 87% full: cpu01: lookup 33.244M ± 0.011M events/sec (approximated from 32 samples of ~30ms) 93% full: cpu01: lookup 32.182M ± 0.013M events/sec (approximated from 32 samples of ~31ms) 100% full: cpu01: lookup 31.497M ± 0.016M events/sec (approximated from 32 samples of ~31ms) Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230213091519.1202813-8-aspsk@isovalent.com --- .../selftests/bpf/progs/bpf_hashmap_lookup.c | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c (limited to 'tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c') diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c new file mode 100644 index 000000000000..1eb74ddca414 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include "vmlinux.h" + +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); +} hash_map_bench SEC(".maps"); + +/* The number of slots to store times */ +#define NR_SLOTS 32 +#define NR_CPUS 256 +#define CPU_MASK (NR_CPUS-1) + +/* Configured by userspace */ +u64 nr_entries; +u64 nr_loops; +u32 __attribute__((__aligned__(8))) key[NR_CPUS]; + +/* Filled by us */ +u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS]; +u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS]; + +static inline void patch_key(u32 i) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + key[0] = i + 1; +#else + key[0] = __builtin_bswap32(i + 1); +#endif + /* the rest of key is random and is configured by userspace */ +} + +static int lookup_callback(__u32 index, u32 *unused) +{ + patch_key(index); + return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1; +} + +static int loop_lookup_callback(__u32 index, u32 *unused) +{ + return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int benchmark(void *ctx) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 times_index; + u64 start_time; + + times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS; + start_time = bpf_ktime_get_ns(); + bpf_loop(nr_loops, loop_lookup_callback, NULL, 0); + percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time; + percpu_times_index[cpu & CPU_MASK] += 1; + return 0; +} -- cgit v1.2.3