summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2024-03-01 02:23:12 +0300
committerMartin KaFai Lau <martin.lau@kernel.org>2024-03-05 01:09:24 +0300
commit8f50d5c423551bfa259af792647a2f4799780ac5 (patch)
treec3e977ac6a70cf40b315f9d84c42ad51b5d35799
parent01031fd473059bf69bb6edc6d51d4bd58ad92e50 (diff)
parent93bc28d859e57f1a654d3b63600d14c85c5630a4 (diff)
downloadlinux-8f50d5c423551bfa259af792647a2f4799780ac5.tar.xz
Merge branch 'Allow struct_ops maps with a large number of programs'
Kui-Feng Lee says: ==================== The BPF struct_ops previously only allowed for one page to be used for the trampolines of all links in a map. However, we have recently run out of space due to the large number of BPF program links. By allocating additional pages when we exhaust an existing page, we can accommodate more links in a single map. The variable st_map->image has been changed to st_map->image_pages, and its type has been changed to an array of pointers to buffers of PAGE_SIZE. Additional pages are allocated when all existing pages are exhausted. The test case loads a struct_ops maps having 40 programs. Their trampolines takes about 6.6k+ bytes over 1.5 pages on x86. --- Major differences from v3: - Refactor buffer allocations to bpf_struct_ops_tramp_buf_alloc() and bpf_struct_ops_tramp_buf_free(). Major differences from v2: - Move image buffer allocation to bpf_struct_ops_prepare_trampoline(). Major differences from v1: - Always free pages if failing to update. - Allocate 8 pages at most. v3: https://lore.kernel.org/all/20240224030302.1500343-1-thinker.li@gmail.com/ v2: https://lore.kernel.org/all/20240221225911.757861-1-thinker.li@gmail.com/ v1: https://lore.kernel.org/all/20240216182828.201727-1-thinker.li@gmail.com/ ==================== Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r--include/linux/bpf.h4
-rw-r--r--kernel/bpf/bpf_struct_ops.c141
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c12
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c102
7 files changed, 279 insertions, 60 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 814dc913a968..785660810e6a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1763,7 +1763,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
void *stub_func,
- void *image, void *image_end);
+ void **image, u32 *image_off,
+ bool allow_alloc);
+void bpf_struct_ops_image_free(void *image);
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
if (owner == BPF_MODULE_OWNER)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index a6019087b467..43356faaa057 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -18,6 +18,8 @@ struct bpf_struct_ops_value {
char data[] ____cacheline_aligned_in_smp;
};
+#define MAX_TRAMP_IMAGE_PAGES 8
+
struct bpf_struct_ops_map {
struct bpf_map map;
struct rcu_head rcu;
@@ -30,12 +32,11 @@ struct bpf_struct_ops_map {
*/
struct bpf_link **links;
u32 links_cnt;
- /* image is a page that has all the trampolines
+ u32 image_pages_cnt;
+ /* image_pages is an array of pages that has all the trampolines
* that stores the func args before calling the bpf_prog.
- * A PAGE_SIZE "image" is enough to store all trampoline for
- * "links[]".
*/
- void *image;
+ void *image_pages[MAX_TRAMP_IMAGE_PAGES];
/* The owner moduler's btf. */
struct btf *btf;
/* uvalue->data stores the kernel struct
@@ -116,6 +117,31 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id,
return true;
}
+static void *bpf_struct_ops_image_alloc(void)
+{
+ void *image;
+ int err;
+
+ err = bpf_jit_charge_modmem(PAGE_SIZE);
+ if (err)
+ return ERR_PTR(err);
+ image = arch_alloc_bpf_trampoline(PAGE_SIZE);
+ if (!image) {
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return image;
+}
+
+void bpf_struct_ops_image_free(void *image)
+{
+ if (image) {
+ arch_free_bpf_trampoline(image, PAGE_SIZE);
+ bpf_jit_uncharge_modmem(PAGE_SIZE);
+ }
+}
+
#define MAYBE_NULL_SUFFIX "__nullable"
#define MAX_STUB_NAME 128
@@ -461,6 +487,15 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
}
}
+static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map)
+{
+ int i;
+
+ for (i = 0; i < st_map->image_pages_cnt; i++)
+ bpf_struct_ops_image_free(st_map->image_pages[i]);
+ st_map->image_pages_cnt = 0;
+}
+
static int check_zero_holes(const struct btf *btf, const struct btf_type *t, void *data)
{
const struct btf_member *member;
@@ -506,9 +541,12 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
- void *stub_func, void *image, void *image_end)
+ void *stub_func,
+ void **_image, u32 *_image_off,
+ bool allow_alloc)
{
- u32 flags = BPF_TRAMP_F_INDIRECT;
+ u32 image_off = *_image_off, flags = BPF_TRAMP_F_INDIRECT;
+ void *image = *_image;
int size;
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
@@ -518,12 +556,32 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
- if (size < 0)
- return size;
- if (size > (unsigned long)image_end - (unsigned long)image)
- return -E2BIG;
- return arch_prepare_bpf_trampoline(NULL, image, image_end,
+ if (size <= 0)
+ return size ? : -EFAULT;
+
+ /* Allocate image buffer if necessary */
+ if (!image || size > PAGE_SIZE - image_off) {
+ if (!allow_alloc)
+ return -E2BIG;
+
+ image = bpf_struct_ops_image_alloc();
+ if (IS_ERR(image))
+ return PTR_ERR(image);
+ image_off = 0;
+ }
+
+ size = arch_prepare_bpf_trampoline(NULL, image + image_off,
+ image + PAGE_SIZE,
model, flags, tlinks, stub_func);
+ if (size <= 0) {
+ if (image != *_image)
+ bpf_struct_ops_image_free(image);
+ return size ? : -EFAULT;
+ }
+
+ *_image = image;
+ *_image_off = image_off + size;
+ return 0;
}
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
@@ -539,8 +597,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
struct bpf_tramp_links *tlinks;
void *udata, *kdata;
int prog_fd, err;
- void *image, *image_end;
- u32 i;
+ u32 i, trampoline_start, image_off = 0;
+ void *cur_image = NULL, *image = NULL;
if (flags)
return -EINVAL;
@@ -578,8 +636,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
udata = &uvalue->data;
kdata = &kvalue->data;
- image = st_map->image;
- image_end = st_map->image + PAGE_SIZE;
module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
for_each_member(i, t, member) {
@@ -658,28 +714,39 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
&bpf_struct_ops_link_lops, prog);
st_map->links[i] = &link->link;
+ trampoline_start = image_off;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
- &st_ops->func_models[i],
- *(void **)(st_ops->cfi_stubs + moff),
- image, image_end);
+ &st_ops->func_models[i],
+ *(void **)(st_ops->cfi_stubs + moff),
+ &image, &image_off,
+ st_map->image_pages_cnt < MAX_TRAMP_IMAGE_PAGES);
+ if (err)
+ goto reset_unlock;
+
+ if (cur_image != image) {
+ st_map->image_pages[st_map->image_pages_cnt++] = image;
+ cur_image = image;
+ trampoline_start = 0;
+ }
if (err < 0)
goto reset_unlock;
- *(void **)(kdata + moff) = image + cfi_get_offset();
- image += err;
+ *(void **)(kdata + moff) = image + trampoline_start + cfi_get_offset();
/* put prog_id to udata */
*(unsigned long *)(udata + moff) = prog->aux->id;
}
+ if (st_ops->validate) {
+ err = st_ops->validate(kdata);
+ if (err)
+ goto reset_unlock;
+ }
+ for (i = 0; i < st_map->image_pages_cnt; i++)
+ arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
+
if (st_map->map.map_flags & BPF_F_LINK) {
err = 0;
- if (st_ops->validate) {
- err = st_ops->validate(kdata);
- if (err)
- goto reset_unlock;
- }
- arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
/* Let bpf_link handle registration & unregistration.
*
* Pair with smp_load_acquire() during lookup_elem().
@@ -688,7 +755,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock;
}
- arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
err = st_ops->reg(kdata);
if (likely(!err)) {
/* This refcnt increment on the map here after
@@ -711,9 +777,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
* there was a race in registering the struct_ops (under the same name) to
* a sub-system through different struct_ops's maps.
*/
- arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
reset_unlock:
+ bpf_struct_ops_map_free_image(st_map);
bpf_struct_ops_map_put_progs(st_map);
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
@@ -780,10 +846,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
if (st_map->links)
bpf_struct_ops_map_put_progs(st_map);
bpf_map_area_free(st_map->links);
- if (st_map->image) {
- arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
- bpf_jit_uncharge_modmem(PAGE_SIZE);
- }
+ bpf_struct_ops_map_free_image(st_map);
bpf_map_area_free(st_map->uvalue);
bpf_map_area_free(st_map);
}
@@ -893,20 +956,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->st_ops_desc = st_ops_desc;
map = &st_map->map;
- ret = bpf_jit_charge_modmem(PAGE_SIZE);
- if (ret)
- goto errout_free;
-
- st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
- if (!st_map->image) {
- /* __bpf_struct_ops_map_free() uses st_map->image as flag
- * for "charged or not". In this case, we need to unchange
- * here.
- */
- bpf_jit_uncharge_modmem(PAGE_SIZE);
- ret = -ENOMEM;
- goto errout_free;
- }
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
st_map->links_cnt = btf_type_vlen(t);
st_map->links =
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 02de71719aed..1b5f812e6972 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -91,6 +91,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
+ u32 image_off = 0;
int prog_ret;
s32 type_id;
int err;
@@ -114,12 +115,6 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
}
- image = arch_alloc_bpf_trampoline(PAGE_SIZE);
- if (!image) {
- err = -ENOMEM;
- goto out;
- }
-
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
@@ -133,7 +128,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
&dummy_ops_test_ret_function,
- image, image + PAGE_SIZE);
+ &image, &image_off,
+ true);
if (err < 0)
goto out;
@@ -147,7 +143,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = -EFAULT;
out:
kfree(args);
- arch_free_bpf_trampoline(image, PAGE_SIZE);
+ bpf_struct_ops_image_free(image);
if (link)
bpf_link_put(&link->link);
kfree(tlinks);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 1b34050a7538..28ffcfbeef14 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -146,11 +146,7 @@ EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
{
struct tcp_congestion_ops *existing;
- int ret;
-
- ret = tcp_validate_congestion_control(ca);
- if (ret)
- return ret;
+ int ret = 0;
ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
index 971458acfac3..622d24e29d35 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -43,6 +43,50 @@ struct bpf_testmod_ops {
int b;
} unsupported;
int data;
+
+ /* The following pointers are used to test the maps having multiple
+ * pages of trampolines.
+ */
+ int (*tramp_1)(int value);
+ int (*tramp_2)(int value);
+ int (*tramp_3)(int value);
+ int (*tramp_4)(int value);
+ int (*tramp_5)(int value);
+ int (*tramp_6)(int value);
+ int (*tramp_7)(int value);
+ int (*tramp_8)(int value);
+ int (*tramp_9)(int value);
+ int (*tramp_10)(int value);
+ int (*tramp_11)(int value);
+ int (*tramp_12)(int value);
+ int (*tramp_13)(int value);
+ int (*tramp_14)(int value);
+ int (*tramp_15)(int value);
+ int (*tramp_16)(int value);
+ int (*tramp_17)(int value);
+ int (*tramp_18)(int value);
+ int (*tramp_19)(int value);
+ int (*tramp_20)(int value);
+ int (*tramp_21)(int value);
+ int (*tramp_22)(int value);
+ int (*tramp_23)(int value);
+ int (*tramp_24)(int value);
+ int (*tramp_25)(int value);
+ int (*tramp_26)(int value);
+ int (*tramp_27)(int value);
+ int (*tramp_28)(int value);
+ int (*tramp_29)(int value);
+ int (*tramp_30)(int value);
+ int (*tramp_31)(int value);
+ int (*tramp_32)(int value);
+ int (*tramp_33)(int value);
+ int (*tramp_34)(int value);
+ int (*tramp_35)(int value);
+ int (*tramp_36)(int value);
+ int (*tramp_37)(int value);
+ int (*tramp_38)(int value);
+ int (*tramp_39)(int value);
+ int (*tramp_40)(int value);
};
#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
new file mode 100644
index 000000000000..645d32b5160c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_multi_pages.skel.h"
+
+static void do_struct_ops_multi_pages(void)
+{
+ struct struct_ops_multi_pages *skel;
+ struct bpf_link *link;
+
+ /* The size of all trampolines of skel->maps.multi_pages should be
+ * over 1 page (at least for x86).
+ */
+ skel = struct_ops_multi_pages__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_multi_pages_open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.multi_pages);
+ ASSERT_OK_PTR(link, "attach_multi_pages");
+
+ bpf_link__destroy(link);
+ struct_ops_multi_pages__destroy(skel);
+}
+
+void test_struct_ops_multi_pages(void)
+{
+ if (test__start_subtest("multi_pages"))
+ do_struct_ops_multi_pages();
+}
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
new file mode 100644
index 000000000000..9efcc6e4d356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TRAMP(x) \
+ SEC("struct_ops/tramp_" #x) \
+ int BPF_PROG(tramp_ ## x, int a) \
+ { \
+ return a; \
+ }
+
+TRAMP(1)
+TRAMP(2)
+TRAMP(3)
+TRAMP(4)
+TRAMP(5)
+TRAMP(6)
+TRAMP(7)
+TRAMP(8)
+TRAMP(9)
+TRAMP(10)
+TRAMP(11)
+TRAMP(12)
+TRAMP(13)
+TRAMP(14)
+TRAMP(15)
+TRAMP(16)
+TRAMP(17)
+TRAMP(18)
+TRAMP(19)
+TRAMP(20)
+TRAMP(21)
+TRAMP(22)
+TRAMP(23)
+TRAMP(24)
+TRAMP(25)
+TRAMP(26)
+TRAMP(27)
+TRAMP(28)
+TRAMP(29)
+TRAMP(30)
+TRAMP(31)
+TRAMP(32)
+TRAMP(33)
+TRAMP(34)
+TRAMP(35)
+TRAMP(36)
+TRAMP(37)
+TRAMP(38)
+TRAMP(39)
+TRAMP(40)
+
+#define F_TRAMP(x) .tramp_ ## x = (void *)tramp_ ## x
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops multi_pages = {
+ F_TRAMP(1),
+ F_TRAMP(2),
+ F_TRAMP(3),
+ F_TRAMP(4),
+ F_TRAMP(5),
+ F_TRAMP(6),
+ F_TRAMP(7),
+ F_TRAMP(8),
+ F_TRAMP(9),
+ F_TRAMP(10),
+ F_TRAMP(11),
+ F_TRAMP(12),
+ F_TRAMP(13),
+ F_TRAMP(14),
+ F_TRAMP(15),
+ F_TRAMP(16),
+ F_TRAMP(17),
+ F_TRAMP(18),
+ F_TRAMP(19),
+ F_TRAMP(20),
+ F_TRAMP(21),
+ F_TRAMP(22),
+ F_TRAMP(23),
+ F_TRAMP(24),
+ F_TRAMP(25),
+ F_TRAMP(26),
+ F_TRAMP(27),
+ F_TRAMP(28),
+ F_TRAMP(29),
+ F_TRAMP(30),
+ F_TRAMP(31),
+ F_TRAMP(32),
+ F_TRAMP(33),
+ F_TRAMP(34),
+ F_TRAMP(35),
+ F_TRAMP(36),
+ F_TRAMP(37),
+ F_TRAMP(38),
+ F_TRAMP(39),
+ F_TRAMP(40),
+};