diff options
Diffstat (limited to 'tools/lib')
-rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 1 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_tracing.h | 2 | ||||
-rw-r--r-- | tools/lib/bpf/btf.c | 160 | ||||
-rw-r--r-- | tools/lib/bpf/elf.c | 139 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 198 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.h | 73 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 7 | ||||
-rw-r--r-- | tools/lib/bpf/ringbuf.c | 85 |
8 files changed, 608 insertions, 57 deletions
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index bbab9ad9dc5a..77ceea575dc7 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -181,6 +181,7 @@ enum libbpf_tristate { #define __ksym __attribute__((section(".ksyms"))) #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) +#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) #define bpf_ksym_exists(sym) ({ \ _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 3803479dbe10..1c13f8e88833 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -362,8 +362,6 @@ struct pt_regs___arm64 { #define __PT_PARM7_REG a6 #define __PT_PARM8_REG a7 -/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ -#define PT_REGS_SYSCALL_REGS(ctx) ctx #define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 8484b563b53d..ee95fd379d4d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -448,6 +448,165 @@ static int btf_parse_type_sec(struct btf *btf) return 0; } +static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id) +{ + const char *s; + + s = btf__str_by_offset(btf, str_off); + if (!s) { + pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id) +{ + const struct btf_type *t; + + t = btf__type_by_id(btf, id); + if (!t) { + pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id) +{ + __u32 kind = btf_kind(t); + int err, i, n; + + err = btf_validate_str(btf, t->name_off, "type name", id); + if (err) + return err; + + switch (kind) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + break; + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + + err = btf_validate_id(btf, a->type, id); + err = err ?: btf_validate_id(btf, a->index_type, id); + if (err) + return err; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "field name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *m = btf_enum(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *m = btf_enum64(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_FUNC: { + const struct btf_type *ft; + + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + ft = btf__type_by_id(btf, t->type); + if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) { + pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type); + return -EINVAL; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *m = btf_params(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "param name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *m = btf_var_secinfos(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + default: + pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); + return -EINVAL; + } + return 0; +} + +/* Validate basic sanity of BTF. It's intentionally less thorough than + * kernel's validation and validates only properties of BTF that libbpf relies + * on to be correct (e.g., valid type IDs, valid string offsets, etc) + */ +static int btf_sanity_check(const struct btf *btf) +{ + const struct btf_type *t; + __u32 i, n = btf__type_cnt(btf); + int err; + + for (i = 1; i < n; i++) { + t = btf_type_by_id(btf, i); + err = btf_validate_type(btf, t, i); + if (err) + return err; + } + return 0; +} + __u32 btf__type_cnt(const struct btf *btf) { return btf->start_id + btf->nr_types; @@ -902,6 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) err = btf_parse_str_sec(btf); err = err ?: btf_parse_type_sec(btf); + err = err ?: btf_sanity_check(btf); if (err) goto done; diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 9d0296c1726a..2a158e8a8b7c 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <libelf.h> #include <gelf.h> #include <fcntl.h> @@ -10,6 +13,17 @@ #define STRERR_BUFSIZE 128 +/* A SHT_GNU_versym section holds 16-bit words. This bit is set if + * the symbol is hidden and can only be seen when referenced using an + * explicit version number. This is a GNU extension. + */ +#define VERSYM_HIDDEN 0x8000 + +/* This is the mask for the rest of the data in a word read from a + * SHT_GNU_versym section. + */ +#define VERSYM_VERSION 0x7fff + int elf_open(const char *binary_path, struct elf_fd *elf_fd) { char errmsg[STRERR_BUFSIZE]; @@ -64,13 +78,18 @@ struct elf_sym { const char *name; GElf_Sym sym; GElf_Shdr sh; + int ver; + bool hidden; }; struct elf_sym_iter { Elf *elf; Elf_Data *syms; + Elf_Data *versyms; + Elf_Data *verdefs; size_t nr_syms; size_t strtabidx; + size_t verdef_strtabidx; size_t next_sym_idx; struct elf_sym sym; int st_type; @@ -111,6 +130,26 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter, iter->nr_syms = iter->syms->d_size / sh.sh_entsize; iter->elf = elf; iter->st_type = st_type; + + /* Version symbol table is meaningful to dynsym only */ + if (sh_type != SHT_DYNSYM) + return 0; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL); + if (!scn) + return 0; + iter->versyms = elf_getdata(scn, 0); + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); + if (!scn) { + pr_debug("elf: failed to find verdef ELF sections in '%s'\n", binary_path); + return -ENOENT; + } + if (!gelf_getshdr(scn, &sh)) + return -EINVAL; + iter->verdef_strtabidx = sh.sh_link; + iter->verdefs = elf_getdata(scn, 0); + return 0; } @@ -119,6 +158,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) struct elf_sym *ret = &iter->sym; GElf_Sym *sym = &ret->sym; const char *name = NULL; + GElf_Versym versym; Elf_Scn *sym_scn; size_t idx; @@ -138,12 +178,80 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) iter->next_sym_idx = idx + 1; ret->name = name; + ret->ver = 0; + ret->hidden = false; + + if (iter->versyms) { + if (!gelf_getversym(iter->versyms, idx, &versym)) + continue; + ret->ver = versym & VERSYM_VERSION; + ret->hidden = versym & VERSYM_HIDDEN; + } return ret; } return NULL; } +static const char *elf_get_vername(struct elf_sym_iter *iter, int ver) +{ + GElf_Verdaux verdaux; + GElf_Verdef verdef; + int offset; + + offset = 0; + while (gelf_getverdef(iter->verdefs, offset, &verdef)) { + if (verdef.vd_ndx != ver) { + if (!verdef.vd_next) + break; + + offset += verdef.vd_next; + continue; + } + + if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux)) + break; + + return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name); + + } + return NULL; +} + +static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym, + const char *name, size_t name_len, const char *lib_ver) +{ + const char *ver_name; + + /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER + * make sure the func part matches the user specified name + */ + if (strncmp(sym->name, name, name_len) != 0) + return false; + + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (sym->name[name_len] != '\0' && sym->name[name_len] != '@') + return false; + + /* If user does not specify symbol version, then we got a match */ + if (!lib_ver) + return true; + + /* If user specifies symbol version, for dynamic symbols, + * get version name from ELF verdef section for comparison. + */ + if (sh_type == SHT_DYNSYM) { + ver_name = elf_get_vername(iter, sym->ver); + if (!ver_name) + return false; + return strcmp(ver_name, lib_ver) == 0; + } + + /* For normal symbols, it is already in form of func@LIB_VER */ + return strcmp(sym->name, name) == 0; +} /* Transform symbol's virtual address (absolute for binaries and relative * for shared libs) into file offset, which is what kernel is expecting @@ -166,7 +274,8 @@ static unsigned long elf_sym_offset(struct elf_sym *sym) long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) { int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; - bool is_shared_lib, is_name_qualified; + const char *at_symbol, *lib_ver; + bool is_shared_lib; long ret = -ENOENT; size_t name_len; GElf_Ehdr ehdr; @@ -179,9 +288,18 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) /* for shared lib case, we do not need to calculate relative offset */ is_shared_lib = ehdr.e_type == ET_DYN; - name_len = strlen(name); - /* Does name specify "@@LIB"? */ - is_name_qualified = strstr(name, "@@") != NULL; + /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */ + at_symbol = strchr(name, '@'); + if (at_symbol) { + name_len = at_symbol - name; + /* skip second @ if it's @@LIB_VER case */ + if (at_symbol[1] == '@') + at_symbol++; + lib_ver = at_symbol + 1; + } else { + name_len = strlen(name); + lib_ver = NULL; + } /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically @@ -201,20 +319,17 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) goto out; while ((sym = elf_sym_iter_next(&iter))) { - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ - if (strncmp(sym->name, name, name_len) != 0) - continue; - /* ...but we don't want a search for "foo" to match 'foo2" also, so any - * additional characters in sname should be of the form "@@LIB". - */ - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') + if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver)) continue; cur_bind = GELF_ST_BIND(sym->sym.st_info); if (ret > 0) { /* handle multiple matches */ - if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { + if (elf_sym_offset(sym) == ret) { + /* same offset, no problem */ + continue; + } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { /* Only accept one non-weak bind. */ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", sym->name, name, binary_path); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 96ff1aa4bf6a..a295f6acf98f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -82,17 +82,22 @@ static const char * const attach_type_name[] = { [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", @@ -436,9 +441,11 @@ struct bpf_program { int fd; bool autoload; bool autoattach; + bool sym_global; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int exception_cb_idx; int prog_ifindex; __u32 attach_btf_obj_fd; @@ -765,6 +772,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->type = BPF_PROG_TYPE_UNSPEC; prog->fd = -1; + prog->exception_cb_idx = -1; /* libbpf's convention for SEC("?abc...") is that it's just like * SEC("abc...") but the corresponding bpf_program starts out with @@ -871,14 +879,16 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) + prog->sym_global = true; + /* if function is a global/weak symbol, but has restricted * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL - && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN - || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) + if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -3142,6 +3152,86 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } } + if (!kernel_supports(obj, FEAT_BTF_DECL_TAG)) + goto skip_exception_cb; + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *prog = &obj->programs[i]; + int j, k, n; + + if (prog_is_subprog(obj, prog)) + continue; + n = btf__type_cnt(obj->btf); + for (j = 1; j < n; j++) { + const char *str = "exception_callback:", *name; + size_t len = strlen(str); + struct btf_type *t; + + t = btf_type_by_id(obj->btf, j); + if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__str_by_offset(obj->btf, t->name_off); + if (strncmp(name, str, len)) + continue; + + t = btf_type_by_id(obj->btf, t->type); + if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", + prog->name); + return -EINVAL; + } + if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off))) + continue; + /* Multiple callbacks are specified for the same prog, + * the verifier will eventually return an error for this + * case, hence simply skip appending a subprog. + */ + if (prog->exception_cb_idx >= 0) { + prog->exception_cb_idx = -1; + break; + } + + name += len; + if (str_is_empty(name)) { + pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", + prog->name); + return -EINVAL; + } + + for (k = 0; k < obj->nr_programs; k++) { + struct bpf_program *subprog = &obj->programs[k]; + + if (!prog_is_subprog(obj, subprog)) + continue; + if (strcmp(name, subprog->name)) + continue; + /* Enforce non-hidden, as from verifier point of + * view it expects global functions, whereas the + * mark_btf_static fixes up linkage as static. + */ + if (!subprog->sym_global || subprog->mark_btf_static) { + pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", + prog->name, subprog->name); + return -EINVAL; + } + /* Let's see if we already saw a static exception callback with the same name */ + if (prog->exception_cb_idx >= 0) { + pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", + prog->name, subprog->name); + return -EINVAL; + } + prog->exception_cb_idx = k; + break; + } + + if (prog->exception_cb_idx >= 0) + continue; + pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); + return -ENOENT; + } + } +skip_exception_cb: + sanitize = btf_needs_sanitization(obj); if (sanitize) { const void *raw_data; @@ -6235,13 +6325,45 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra } static int +bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, + struct bpf_program *subprog) +{ + struct bpf_insn *insns; + size_t new_cnt; + int err; + + subprog->sub_insn_off = main_prog->insns_cnt; + + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; + + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); + + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; + return 0; +} + +static int bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *prog) { - size_t sub_insn_idx, insn_idx, new_cnt; + size_t sub_insn_idx, insn_idx; struct bpf_program *subprog; - struct bpf_insn *insns, *insn; struct reloc_desc *relo; + struct bpf_insn *insn; int err; err = reloc_prog_func_and_line_info(obj, main_prog, prog); @@ -6316,25 +6438,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * and relocate. */ if (subprog->sub_insn_off == 0) { - subprog->sub_insn_off = main_prog->insns_cnt; - - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; - - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); - - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); - - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); + err = bpf_object__append_subprog_code(obj, main_prog, subprog); if (err) return err; err = bpf_object__reloc_code(obj, main_prog, subprog); @@ -6568,6 +6672,25 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog->name, err); return err; } + + /* Now, also append exception callback if it has not been done already. */ + if (prog->exception_cb_idx >= 0) { + struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; + + /* Calling exception callback directly is disallowed, which the + * verifier will reject later. In case it was processed already, + * we can skip this step, otherwise for all other valid cases we + * have to append exception callback now. + */ + if (subprog->sub_insn_off == 0) { + err = bpf_object__append_subprog_code(obj, prog, subprog); + if (err) + return err; + err = bpf_object__reloc_code(obj, prog, subprog); + if (err) + return err; + } + } } /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { @@ -8842,14 +8965,19 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), @@ -10996,7 +11124,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", &probe_type, &binary_path, &func_name); switch (n) { case 1: @@ -11506,14 +11634,14 @@ err_out: static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); - char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; - int n, ret = -EINVAL; + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; + int n, c, ret = -EINVAL; long offset = 0; *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", - &probe_type, &binary_path, &func_name, &offset); + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); switch (n) { case 1: /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ @@ -11524,7 +11652,17 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf prog->name, prog->sec_name); break; case 3: - case 4: + /* check if user specifies `+offset`, if yes, this should be + * the last part of the string, make sure sscanf read to EOL + */ + func_off = strrchr(func_name, '+'); + if (func_off) { + n = sscanf(func_off, "+%li%n", &offset, &c); + if (n == 1 && *(func_off + c) == '\0') + func_off[0] = '\0'; + else + offset = 0; + } opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || strcmp(probe_type, "uretprobe.s") == 0; if (opts.retprobe && offset != 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0e52621cba43..475378438545 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1229,6 +1229,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, /* Ring buffer APIs */ struct ring_buffer; +struct ring; struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); @@ -1249,6 +1250,78 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); +/** + * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given + * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance. + * + * @param rb A ringbuffer manager object. + * @param idx An index into the ringbuffers contained within the ringbuffer + * manager object. The index is 0-based and corresponds to the order in which + * ring_buffer__add was called. + * @return A ringbuffer object on success; NULL and errno set if the index is + * invalid. + */ +LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb, + unsigned int idx); + +/** + * @brief **ring__consumer_pos()** returns the current consumer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current consumer position. + */ +LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r); + +/** + * @brief **ring__producer_pos()** returns the current producer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current producer position. + */ +LIBBPF_API unsigned long ring__producer_pos(const struct ring *r); + +/** + * @brief **ring__avail_data_size()** returns the number of bytes in the + * ringbuffer not yet consumed. This has no locking associated with it, so it + * can be inaccurate if operations are ongoing while this is called. However, it + * should still show the correct trend over the long-term. + * + * @param r A ringbuffer object. + * @return The number of bytes not yet consumed. + */ +LIBBPF_API size_t ring__avail_data_size(const struct ring *r); + +/** + * @brief **ring__size()** returns the total size of the ringbuffer's map data + * area (excluding special producer/consumer pages). Effectively this gives the + * amount of usable bytes of data inside the ringbuffer. + * + * @param r A ringbuffer object. + * @return The total size of the ringbuffer map data area. + */ +LIBBPF_API size_t ring__size(const struct ring *r); + +/** + * @brief **ring__map_fd()** returns the file descriptor underlying the given + * ringbuffer. + * + * @param r A ringbuffer object. + * @return The underlying ringbuffer file descriptor + */ +LIBBPF_API int ring__map_fd(const struct ring *r); + +/** + * @brief **ring__consume()** consumes available ringbuffer data without event + * polling. + * + * @param r A ringbuffer object. + * @return The number of records consumed (or INT_MAX, whichever is less), or + * a negative number if any of the callbacks return an error. + */ +LIBBPF_API int ring__consume(struct ring *r); + struct user_ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatibility */ }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 57712321490f..cc973b678a39 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -400,4 +400,11 @@ LIBBPF_1.3.0 { bpf_program__attach_netfilter; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; + ring__avail_data_size; + ring__consume; + ring__consumer_pos; + ring__map_fd; + ring__producer_pos; + ring__size; + ring_buffer__ring; } LIBBPF_1.2.0; diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 02199364db13..aacb64278a01 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -34,7 +34,7 @@ struct ring { struct ring_buffer { struct epoll_event *events; - struct ring *rings; + struct ring **rings; size_t page_size; int epoll_fd; int ring_cnt; @@ -57,7 +57,7 @@ struct ringbuf_hdr { __u32 pad; }; -static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) +static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r) { if (r->consumer_pos) { munmap(r->consumer_pos, rb->page_size); @@ -67,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); r->producer_pos = NULL; } + + free(r); } /* Add extra RINGBUF maps to this ring buffer manager */ @@ -107,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, return libbpf_err(-ENOMEM); rb->events = tmp; - r = &rb->rings[rb->ring_cnt]; - memset(r, 0, sizeof(*r)); + r = calloc(1, sizeof(*r)); + if (!r) + return libbpf_err(-ENOMEM); + rb->rings[rb->ring_cnt] = r; r->map_fd = map_fd; r->sample_cb = sample_cb; @@ -121,7 +125,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->consumer_pos = tmp; @@ -131,16 +135,16 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, */ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; if (mmap_sz != (__u64)(size_t)mmap_sz) { + err = -E2BIG; pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); - return libbpf_err(-E2BIG); + goto err_out; } tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->producer_pos = tmp; r->data = tmp + rb->page_size; @@ -152,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } rb->ring_cnt++; return 0; + +err_out: + ringbuf_free_ring(rb, r); + return libbpf_err(err); } void ring_buffer__free(struct ring_buffer *rb) @@ -170,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb) return; for (i = 0; i < rb->ring_cnt; ++i) - ringbuf_unmap_ring(rb, &rb->rings[i]); + ringbuf_free_ring(rb, rb->rings[i]); if (rb->epoll_fd >= 0) close(rb->epoll_fd); @@ -278,7 +285,7 @@ int ring_buffer__consume(struct ring_buffer *rb) int i; for (i = 0; i < rb->ring_cnt; i++) { - struct ring *ring = &rb->rings[i]; + struct ring *ring = rb->rings[i]; err = ringbuf_process_ring(ring); if (err < 0) @@ -305,7 +312,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) for (i = 0; i < cnt; i++) { __u32 ring_id = rb->events[i].data.fd; - struct ring *ring = &rb->rings[ring_id]; + struct ring *ring = rb->rings[ring_id]; err = ringbuf_process_ring(ring); if (err < 0) @@ -323,6 +330,58 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb) return rb->epoll_fd; } +struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx) +{ + if (idx >= rb->ring_cnt) + return errno = ERANGE, NULL; + + return rb->rings[idx]; +} + +unsigned long ring__consumer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */ + return smp_load_acquire(r->consumer_pos); +} + +unsigned long ring__producer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in + * the kernel. + */ + return smp_load_acquire(r->producer_pos); +} + +size_t ring__avail_data_size(const struct ring *r) +{ + unsigned long cons_pos, prod_pos; + + cons_pos = ring__consumer_pos(r); + prod_pos = ring__producer_pos(r); + return prod_pos - cons_pos; +} + +size_t ring__size(const struct ring *r) +{ + return r->mask + 1; +} + +int ring__map_fd(const struct ring *r) +{ + return r->map_fd; +} + +int ring__consume(struct ring *r) +{ + int64_t res; + + res = ringbuf_process_ring(r); + if (res < 0) + return libbpf_err(res); + + return res > INT_MAX ? INT_MAX : res; +} + static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) { if (rb->consumer_pos) { |