summaryrefslogtreecommitdiff
path: root/samples/bpf/map_perf_test_user.c
diff options
context:
space:
mode:
authorDavid Herrmann <dh.herrmann@gmail.com>2017-01-21 19:26:13 +0300
committerDavid S. Miller <davem@davemloft.net>2017-01-24 00:10:38 +0300
commitb8a943e2942296aad37a8e7adc43db493413e54b (patch)
tree60d9d1e583a232e729f965670b2e21dd14a6bc84 /samples/bpf/map_perf_test_user.c
parent4d3381f5a322dd5db2477e224821790478488173 (diff)
downloadlinux-b8a943e2942296aad37a8e7adc43db493413e54b.tar.xz
samples/bpf: add lpm-trie benchmark
Extend the map_perf_test_{user,kern}.c infrastructure to stress test lpm-trie lookups. We hook into the kprobe on sys_gettid() and measure the latency depending on trie size and lookup count. On my Intel Haswell i7-6400U, a single gettid() syscall with an empty bpf program takes roughly 6.5us on my system. Lookups in empty tries take ~1.8us on first try, ~0.9us on retries. Lookups in tries with 8192 entries take ~7.1us (on the first _and_ any subsequent try). Signed-off-by: David Herrmann <dh.herrmann@gmail.com> Reviewed-by: Daniel Mack <daniel@zonque.org> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples/bpf/map_perf_test_user.c')
-rw-r--r--samples/bpf/map_perf_test_user.c49
1 files changed, 49 insertions, 0 deletions
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 9505b4d112f4..680260a91f50 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -37,6 +37,7 @@ static __u64 time_get_ns(void)
#define PERCPU_HASH_KMALLOC (1 << 3)
#define LRU_HASH_PREALLOC (1 << 4)
#define PERCPU_LRU_HASH_PREALLOC (1 << 5)
+#define LPM_KMALLOC (1 << 6)
static int test_flags = ~0;
@@ -112,6 +113,18 @@ static void test_percpu_hash_kmalloc(int cpu)
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
}
+static void test_lpm_kmalloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_gettid);
+ printf("%d:lpm_perf kmalloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
static void loop(int cpu)
{
cpu_set_t cpuset;
@@ -137,6 +150,9 @@ static void loop(int cpu)
if (test_flags & PERCPU_LRU_HASH_PREALLOC)
test_percpu_lru_hash_prealloc(cpu);
+
+ if (test_flags & LPM_KMALLOC)
+ test_lpm_kmalloc(cpu);
}
static void run_perf_test(int tasks)
@@ -162,6 +178,37 @@ static void run_perf_test(int tasks)
}
}
+static void fill_lpm_trie(void)
+{
+ struct bpf_lpm_trie_key *key;
+ unsigned long value = 0;
+ unsigned int i;
+ int r;
+
+ key = alloca(sizeof(*key) + 4);
+ key->prefixlen = 32;
+
+ for (i = 0; i < 512; ++i) {
+ key->prefixlen = rand() % 33;
+ key->data[0] = rand() & 0xff;
+ key->data[1] = rand() & 0xff;
+ key->data[2] = rand() & 0xff;
+ key->data[3] = rand() & 0xff;
+ r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ assert(!r);
+ }
+
+ key->prefixlen = 32;
+ key->data[0] = 192;
+ key->data[1] = 168;
+ key->data[2] = 0;
+ key->data[3] = 1;
+ value = 128;
+
+ r = bpf_map_update_elem(map_fd[6], key, &value, 0);
+ assert(!r);
+}
+
int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -182,6 +229,8 @@ int main(int argc, char **argv)
return 1;
}
+ fill_lpm_trie();
+
run_perf_test(num_cpu);
return 0;