From 51161bfc66a68d21f13d15a689b3ea7980457790 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 19 Apr 2020 18:55:06 +0300 Subject: kernel/module: Hide vermagic header file from general use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VERMAGIC* definitions are not supposed to be used by the drivers, see this [1] bug report, so introduce special define to guard inclusion of this header file and define it in kernel/modules.h and in internal script that generates *.mod.c files. In-tree module build: ➜ kernel git:(vermagic) ✗ make clean ➜ kernel git:(vermagic) ✗ make M=drivers/infiniband/hw/mlx5 ➜ kernel git:(vermagic) ✗ modinfo drivers/infiniband/hw/mlx5/mlx5_ib.ko filename: /images/leonro/src/kernel/drivers/infiniband/hw/mlx5/mlx5_ib.ko <...> vermagic: 5.6.0+ SMP mod_unload modversions Out-of-tree module build: ➜ mlx5 make -C /images/leonro/src/kernel clean M=/tmp/mlx5 ➜ mlx5 make -C /images/leonro/src/kernel M=/tmp/mlx5 ➜ mlx5 modinfo /tmp/mlx5/mlx5_ib.ko filename: /tmp/mlx5/mlx5_ib.ko <...> vermagic: 5.6.0+ SMP mod_unload modversions [1] https://lore.kernel.org/lkml/20200411155623.GA22175@zn.tnic Reported-by: Borislav Petkov Acked-by: Borislav Petkov Acked-by: Jessica Yu Co-developed-by: Masahiro Yamada Signed-off-by: Masahiro Yamada Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- scripts/mod/modpost.c | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 5c3c50c5ec52..7f7d4ee7b652 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2251,6 +2251,7 @@ static void add_header(struct buffer *b, struct module *mod) * Include build-salt.h after module.h in order to * inherit the definitions. */ + buf_printf(b, "#define INCLUDE_VERMAGIC\n"); buf_printf(b, "#include \n"); buf_printf(b, "#include \n"); buf_printf(b, "#include \n"); -- cgit v1.2.3 From c75a33c84b83ffbb8b8b58a6bf4dea69dba21326 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 6 May 2020 17:58:27 -0700 Subject: net: remove newlines in NL_SET_ERR_MSG_MOD The NL_SET_ERR_MSG_MOD macro is used to report a string describing an error message to userspace via the netlink extended ACK structure. It should not have a trailing newline. Add a cocci script which catches cases where the newline marker is present. Using this script, fix the handful of cases which accidentally included a trailing new line. I couldn't figure out a way to get a patch mode working, so this script only implements context, report, and org. Signed-off-by: Jacob Keller Cc: Jakub Kicinski Cc: Andy Whitcroft Cc: Joe Perches Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/ethernet/mscc/ocelot_tc.c | 6 +- net/bridge/br_mrp_netlink.c | 2 +- net/bridge/br_stp_if.c | 2 +- net/dsa/slave.c | 6 +- scripts/coccinelle/misc/newline_in_nl_msg.cocci | 75 +++++++++++++++++++++++++ 6 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 scripts/coccinelle/misc/newline_in_nl_msg.cocci (limited to 'scripts') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 77397aa66810..a050808f2128 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1097,7 +1097,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (IS_ERR(priv->fs.tc.t)) { mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, - "Failed to create tc offload table\n"); + "Failed to create tc offload table"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); return PTR_ERR(priv->fs.tc.t); diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c index d326e231f0ad..b7baf7624e18 100644 --- a/drivers/net/ethernet/mscc/ocelot_tc.c +++ b/drivers/net/ethernet/mscc/ocelot_tc.c @@ -48,7 +48,7 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, if (priv->tc.police_id && priv->tc.police_id != f->cookie) { NL_SET_ERR_MSG_MOD(extack, - "Only one policer per port is supported\n"); + "Only one policer per port is supported"); return -EEXIST; } @@ -59,7 +59,7 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, err = ocelot_port_policer_add(ocelot, port, &pol); if (err) { - NL_SET_ERR_MSG_MOD(extack, "Could not add policer\n"); + NL_SET_ERR_MSG_MOD(extack, "Could not add policer"); return err; } @@ -73,7 +73,7 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, err = ocelot_port_policer_del(ocelot, port); if (err) { NL_SET_ERR_MSG_MOD(extack, - "Could not delete policer\n"); + "Could not delete policer"); return err; } priv->tc.police_id = 0; diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index 503896638be0..397e7f710772 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -28,7 +28,7 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, int err; if (br->stp_enabled != BR_NO_STP) { - NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled\n"); + NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled"); return -EINVAL; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a42850b7eb9a..ba55851fe132 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -203,7 +203,7 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val, if (br_mrp_enabled(br)) { NL_SET_ERR_MSG_MOD(extack, - "STP can't be enabled if MRP is already enabled\n"); + "STP can't be enabled if MRP is already enabled"); return -EINVAL; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index ea0fcf7bf786..dfb4282fc339 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -911,13 +911,13 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev, if (!ds->ops->port_policer_add) { NL_SET_ERR_MSG_MOD(extack, - "Policing offload not implemented\n"); + "Policing offload not implemented"); return -EOPNOTSUPP; } if (!ingress) { NL_SET_ERR_MSG_MOD(extack, - "Only supported on ingress qdisc\n"); + "Only supported on ingress qdisc"); return -EOPNOTSUPP; } @@ -928,7 +928,7 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev, list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) { if (mall_tc_entry->type == DSA_PORT_MALL_POLICER) { NL_SET_ERR_MSG_MOD(extack, - "Only one port policer allowed\n"); + "Only one port policer allowed"); return -EEXIST; } } diff --git a/scripts/coccinelle/misc/newline_in_nl_msg.cocci b/scripts/coccinelle/misc/newline_in_nl_msg.cocci new file mode 100644 index 000000000000..c175886e4015 --- /dev/null +++ b/scripts/coccinelle/misc/newline_in_nl_msg.cocci @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/// +/// Catch strings ending in newline with GENL_SET_ERR_MSG, NL_SET_ERR_MSG, +/// NL_SET_ERR_MSG_MOD. +/// +// Confidence: Very High +// Copyright: (C) 2020 Intel Corporation +// URL: http://coccinelle.lip6.fr/ +// Options: --no-includes --include-headers + +virtual context +virtual org +virtual report + +@r depends on context || org || report@ +expression e; +constant m; +position p; +@@ + \(GENL_SET_ERR_MSG\|NL_SET_ERR_MSG\|NL_SET_ERR_MSG_MOD\)(e,m@p) + +@script:python@ +m << r.m; +@@ + +if not m.endswith("\\n\""): + cocci.include_match(False) + +@r1 depends on r@ +identifier fname; +expression r.e; +constant r.m; +position r.p; +@@ + fname(e,m@p) + +//---------------------------------------------------------- +// For context mode +//---------------------------------------------------------- + +@depends on context && r@ +identifier r1.fname; +expression r.e; +constant r.m; +@@ +* fname(e,m) + +//---------------------------------------------------------- +// For org mode +//---------------------------------------------------------- + +@script:python depends on org@ +fname << r1.fname; +m << r.m; +p << r.p; +@@ + +if m.endswith("\\n\""): + msg="WARNING avoid newline at end of message in %s" % (fname) + msg_safe=msg.replace("[","@(").replace("]",")") + coccilib.org.print_todo(p[0], msg_safe) + +//---------------------------------------------------------- +// For report mode +//---------------------------------------------------------- + +@script:python depends on report@ +fname << r1.fname; +m << r.m; +p << r.p; +@@ + +if m.endswith("\\n\""): + msg="WARNING avoid newline at end of message in %s" % (fname) + coccilib.report.print_report(p[0], msg) -- cgit v1.2.3 From 492e639f0c222784e2e0f121966375f641c61b15 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:14 -0700 Subject: bpf: Add bpf_seq_printf and bpf_seq_write helpers Two helpers bpf_seq_printf and bpf_seq_write, are added for writing data to the seq_file buffer. bpf_seq_printf supports common format string flag/width/type fields so at least I can get identical results for netlink and ipv6_route targets. For bpf_seq_printf and bpf_seq_write, return value -EOVERFLOW specifically indicates a write failure due to overflow, which means the object will be repeated in the next bpf invocation if object collection stays the same. Note that if the object collection is changed, depending how collection traversal is done, even if the object still in the collection, it may not be visited. For bpf_seq_printf, format %s, %p{i,I}{4,6} needs to read kernel memory. Reading kernel memory may fail in the following two cases: - invalid kernel address, or - valid kernel address but requiring a major fault If reading kernel memory failed, the %s string will be an empty string and %p{i,I}{4,6} will be all 0. Not returning error to bpf program is consistent with what bpf_trace_printk() does for now. bpf_seq_printf may return -EBUSY meaning that internal percpu buffer for memory copy of strings or other pointees is not available. Bpf program can return 1 to indicate it wants the same object to be repeated. Right now, this should not happen on no-RT kernels since migrate_disable(), which guards bpf prog call, calls preempt_disable(). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175914.2476661-1-yhs@fb.com --- include/uapi/linux/bpf.h | 39 +++++++- kernel/trace/bpf_trace.c | 214 +++++++++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 + tools/include/uapi/linux/bpf.h | 39 +++++++- 4 files changed, 292 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 708763f702e1..9d1932e23cec 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3077,6 +3077,41 @@ union bpf_attr { * See: clock_gettime(CLOCK_BOOTTIME) * Return * Current *ktime*. + * + * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * seq_printf uses seq_file seq_printf() to print out the format string. + * The *m* represents the seq_file. The *fmt* and *fmt_size* are for + * the format string itself. The *data* and *data_len* are format string + * arguments. The *data* are a u64 array and corresponding format string + * values are stored in the array. For strings and pointers where pointees + * are accessed, only the pointer values are stored in the *data* array. + * The *data_len* is the *data* size in term of bytes. + * + * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. + * Reading kernel memory may fail due to either invalid address or + * valid address but requiring a major memory fault. If reading kernel memory + * fails, the string for **%s** will be an empty string, and the ip + * address for **%p{i,I}{4,6}** will be 0. Not returning error to + * bpf program is consistent with what bpf_trace_printk() does for now. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EBUSY** Percpu memory copy buffer is busy, can try again + * by returning 1 from bpf program. + * * **-EINVAL** Invalid arguments, or invalid/unsupported formats. + * * **-E2BIG** Too many format specifiers. + * * **-EOVERFLOW** Overflow happens, the same object will be tried again. + * + * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * Description + * seq_write uses seq_file seq_write() to write the data. + * The *m* represents the seq_file. The *data* and *len* represent the + * data to write in bytes. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EOVERFLOW** Overflow happens, the same object will be tried again. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3204,7 +3239,9 @@ union bpf_attr { FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ FN(sk_assign), \ - FN(ktime_get_boot_ns), + FN(ktime_get_boot_ns), \ + FN(seq_printf), \ + FN(seq_write), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e875c95d3ced..d961428fb5b6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -457,6 +457,212 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } +#define MAX_SEQ_PRINTF_VARARGS 12 +#define MAX_SEQ_PRINTF_MAX_MEMCPY 6 +#define MAX_SEQ_PRINTF_STR_LEN 128 + +struct bpf_seq_printf_buf { + char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN]; +}; +static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf); +static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used); + +BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, + const void *, data, u32, data_len) +{ + int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0; + int i, buf_used, copy_size, num_args; + u64 params[MAX_SEQ_PRINTF_VARARGS]; + struct bpf_seq_printf_buf *bufs; + const u64 *args = data; + + buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used); + if (WARN_ON_ONCE(buf_used > 1)) { + err = -EBUSY; + goto out; + } + + bufs = this_cpu_ptr(&bpf_seq_printf_buf); + + /* + * bpf_check()->check_func_arg()->check_stack_boundary() + * guarantees that fmt points to bpf program stack, + * fmt_size bytes of it were initialized and fmt_size > 0 + */ + if (fmt[--fmt_size] != 0) + goto out; + + if (data_len & 7) + goto out; + + for (i = 0; i < fmt_size; i++) { + if (fmt[i] == '%') { + if (fmt[i + 1] == '%') + i++; + else if (!data || !data_len) + goto out; + } + } + + num_args = data_len / 8; + + /* check format string for allowed specifiers */ + for (i = 0; i < fmt_size; i++) { + /* only printable ascii for now. */ + if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) { + err = -EINVAL; + goto out; + } + + if (fmt[i] != '%') + continue; + + if (fmt[i + 1] == '%') { + i++; + continue; + } + + if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) { + err = -E2BIG; + goto out; + } + + if (fmt_cnt >= num_args) { + err = -EINVAL; + goto out; + } + + /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ + i++; + + /* skip optional "[0 +-][num]" width formating field */ + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' || + fmt[i] == ' ') + i++; + if (fmt[i] >= '1' && fmt[i] <= '9') { + i++; + while (fmt[i] >= '0' && fmt[i] <= '9') + i++; + } + + if (fmt[i] == 's') { + /* try our best to copy */ + if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) { + err = -E2BIG; + goto out; + } + + err = strncpy_from_unsafe(bufs->buf[memcpy_cnt], + (void *) (long) args[fmt_cnt], + MAX_SEQ_PRINTF_STR_LEN); + if (err < 0) + bufs->buf[memcpy_cnt][0] = '\0'; + params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt]; + + fmt_cnt++; + memcpy_cnt++; + continue; + } + + if (fmt[i] == 'p') { + if (fmt[i + 1] == 0 || + fmt[i + 1] == 'K' || + fmt[i + 1] == 'x') { + /* just kernel pointers */ + params[fmt_cnt] = args[fmt_cnt]; + fmt_cnt++; + continue; + } + + /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */ + if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') { + err = -EINVAL; + goto out; + } + if (fmt[i + 2] != '4' && fmt[i + 2] != '6') { + err = -EINVAL; + goto out; + } + + if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) { + err = -E2BIG; + goto out; + } + + + copy_size = (fmt[i + 2] == '4') ? 4 : 16; + + err = probe_kernel_read(bufs->buf[memcpy_cnt], + (void *) (long) args[fmt_cnt], + copy_size); + if (err < 0) + memset(bufs->buf[memcpy_cnt], 0, copy_size); + params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt]; + + i += 2; + fmt_cnt++; + memcpy_cnt++; + continue; + } + + if (fmt[i] == 'l') { + i++; + if (fmt[i] == 'l') + i++; + } + + if (fmt[i] != 'i' && fmt[i] != 'd' && + fmt[i] != 'u' && fmt[i] != 'x') { + err = -EINVAL; + goto out; + } + + params[fmt_cnt] = args[fmt_cnt]; + fmt_cnt++; + } + + /* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give + * all of them to seq_printf(). + */ + seq_printf(m, fmt, params[0], params[1], params[2], params[3], + params[4], params[5], params[6], params[7], params[8], + params[9], params[10], params[11]); + + err = seq_has_overflowed(m) ? -EOVERFLOW : 0; +out: + this_cpu_dec(bpf_seq_printf_buf_used); + return err; +} + +static int bpf_seq_printf_btf_ids[5]; +static const struct bpf_func_proto bpf_seq_printf_proto = { + .func = bpf_seq_printf, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_PTR_TO_MEM_OR_NULL, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_seq_printf_btf_ids, +}; + +BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len) +{ + return seq_write(m, data, len) ? -EOVERFLOW : 0; +} + +static int bpf_seq_write_btf_ids[5]; +static const struct bpf_func_proto bpf_seq_write_proto = { + .func = bpf_seq_write, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_seq_write_btf_ids, +}; + static __always_inline int get_map_perf_counter(struct bpf_map *map, u64 flags, u64 *value, u64 *enabled, u64 *running) @@ -1226,6 +1432,14 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_xdp_output: return &bpf_xdp_output_proto; #endif + case BPF_FUNC_seq_printf: + return prog->expected_attach_type == BPF_TRACE_ITER ? + &bpf_seq_printf_proto : + NULL; + case BPF_FUNC_seq_write: + return prog->expected_attach_type == BPF_TRACE_ITER ? + &bpf_seq_write_proto : + NULL; default: return raw_tp_prog_func_proto(func_id, prog); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index f43d193aff3a..ded304c96a05 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -414,6 +414,7 @@ class PrinterHelpers(Printer): 'struct sk_reuseport_md', 'struct sockaddr', 'struct tcphdr', + 'struct seq_file', 'struct __sk_buff', 'struct sk_msg_md', @@ -450,6 +451,7 @@ class PrinterHelpers(Printer): 'struct sk_reuseport_md', 'struct sockaddr', 'struct tcphdr', + 'struct seq_file', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 708763f702e1..9d1932e23cec 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3077,6 +3077,41 @@ union bpf_attr { * See: clock_gettime(CLOCK_BOOTTIME) * Return * Current *ktime*. + * + * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * seq_printf uses seq_file seq_printf() to print out the format string. + * The *m* represents the seq_file. The *fmt* and *fmt_size* are for + * the format string itself. The *data* and *data_len* are format string + * arguments. The *data* are a u64 array and corresponding format string + * values are stored in the array. For strings and pointers where pointees + * are accessed, only the pointer values are stored in the *data* array. + * The *data_len* is the *data* size in term of bytes. + * + * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. + * Reading kernel memory may fail due to either invalid address or + * valid address but requiring a major memory fault. If reading kernel memory + * fails, the string for **%s** will be an empty string, and the ip + * address for **%p{i,I}{4,6}** will be 0. Not returning error to + * bpf program is consistent with what bpf_trace_printk() does for now. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EBUSY** Percpu memory copy buffer is busy, can try again + * by returning 1 from bpf program. + * * **-EINVAL** Invalid arguments, or invalid/unsupported formats. + * * **-E2BIG** Too many format specifiers. + * * **-EOVERFLOW** Overflow happens, the same object will be tried again. + * + * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * Description + * seq_write uses seq_file seq_write() to write the data. + * The *m* represents the seq_file. The *data* and *len* represent the + * data to write in bytes. + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EOVERFLOW** Overflow happens, the same object will be tried again. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3204,7 +3239,9 @@ union bpf_attr { FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ FN(sk_assign), \ - FN(ktime_get_boot_ns), + FN(ktime_get_boot_ns), \ + FN(seq_printf), \ + FN(seq_write), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From ab8d78093dfa2e7820ca0c28dda9142aa771c510 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 11 May 2020 17:15:35 +0100 Subject: bpf: Minor fixes to BPF helpers documentation Minor improvements to the documentation for BPF helpers: * Fix formatting for the description of "bpf_socket" for bpf_getsockopt() and bpf_setsockopt(), thus suppressing two warnings from rst2man about "Unexpected indentation". * Fix formatting for return values for bpf_sk_assign() and seq_file helpers. * Fix and harmonise formatting, in particular for function/struct names. * Remove blank lines before "Return:" sections. * Replace tabs found in the middle of text lines. * Fix typos. * Add a note to the footer (in Python script) about "bpftool feature probe", including for listing features available to unprivileged users, and add a reference to bpftool man page. Thanks to Florian for reporting two typos (duplicated words). Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200511161536.29853-4-quentin@isovalent.com --- include/uapi/linux/bpf.h | 109 ++++++++++++++++++++++++--------------------- scripts/bpf_helpers_doc.py | 6 +++ 2 files changed, 65 insertions(+), 50 deletions(-) (limited to 'scripts') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9d1932e23cec..bfb31c1be219 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -675,8 +675,8 @@ union bpf_attr { * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. * - * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel() - * instead. + * Generally, use **bpf_probe_read_user**\ () or + * **bpf_probe_read_kernel**\ () instead. * Return * 0 on success, or a negative error in case of failure. * @@ -684,7 +684,7 @@ union bpf_attr { * Description * Return the time elapsed since system boot, in nanoseconds. * Does not include time the system was suspended. - * See: clock_gettime(CLOCK_MONOTONIC) + * See: **clock_gettime**\ (**CLOCK_MONOTONIC**) * Return * Current *ktime*. * @@ -1543,11 +1543,11 @@ union bpf_attr { * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address - * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for + * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for * more details. * - * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str() - * instead. + * Generally, use **bpf_probe_read_user_str**\ () or + * **bpf_probe_read_kernel_str**\ () instead. * Return * On success, the strictly positive length of the string, * including the trailing NUL character. On error, a negative @@ -1575,7 +1575,7 @@ union bpf_attr { * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description - * Equivalent to bpf_get_socket_cookie() helper that accepts + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. @@ -1604,6 +1604,7 @@ union bpf_attr { * The option value of length *optlen* is pointed by *optval*. * * *bpf_socket* should be one of the following: + * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** * and **BPF_CGROUP_INET6_CONNECT**. @@ -1672,12 +1673,12 @@ union bpf_attr { * * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be - * one of the XDP program return codes up to XDP_TX, as chosen by - * the caller. Any higher bits in the *flags* argument must be + * one of the XDP program return codes up to **XDP_TX**, as chosen + * by the caller. Any higher bits in the *flags* argument must be * unset. * - * See also bpf_redirect(), which only supports redirecting to an - * ifindex, but doesn't require a map to do so. + * See also **bpf_redirect**\ (), which only supports redirecting + * to an ifindex, but doesn't require a map to do so. * Return * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. @@ -1785,7 +1786,7 @@ union bpf_attr { * the time running for event since last normalization. The * enabled and running times are accumulated since the perf event * open. To achieve scaling factor between two invocations of an - * eBPF program, users can can use CPU id as the key (which is + * eBPF program, users can use CPU id as the key (which is * typical for perf array usage model) to remember the previous * value and do the calculation inside the eBPF program. * Return @@ -1812,6 +1813,7 @@ union bpf_attr { * *opval* and of length *optlen*. * * *bpf_socket* should be one of the following: + * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** * and **BPF_CGROUP_INET6_CONNECT**. @@ -1833,7 +1835,7 @@ union bpf_attr { * The first argument is the context *regs* on which the kprobe * works. * - * This helper works by setting setting the PC (program counter) + * This helper works by setting the PC (program counter) * to an override function which is run in place of the original * probed function. This means the probed function is not run at * all. The replacement function just returns with the required @@ -2300,7 +2302,7 @@ union bpf_attr { * **bpf_rc_keydown**\ () again with the same values, or calling * **bpf_rc_repeat**\ (). * - * Some protocols include a toggle bit, in case the button was + * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into @@ -2646,7 +2648,6 @@ union bpf_attr { * * *th* points to the start of the TCP header, while *th_len* * contains **sizeof**\ (**struct tcphdr**). - * * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. @@ -2829,7 +2830,6 @@ union bpf_attr { * * *th* points to the start of the TCP header, while *th_len* * contains the length of the TCP header. - * * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, @@ -2912,7 +2912,7 @@ union bpf_attr { * // size, after checking its boundaries. * } * - * In comparison, using **bpf_probe_read_user()** helper here + * In comparison, using **bpf_probe_read_user**\ () helper here * instead to read the string would require to estimate the length * at compile time, and would often result in copying more memory * than necessary. @@ -2930,14 +2930,14 @@ union bpf_attr { * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* - * to *dst*. Same semantics as with bpf_probe_read_user_str() apply. + * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. * Return - * On success, the strictly positive length of the string, including + * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description - * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock. + * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. @@ -2965,19 +2965,19 @@ union bpf_attr { * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the - * branch records (struct perf_branch_entry) associated to *ctx* - * and store it in the buffer pointed by *buf* up to size + * branch records (**struct perf_branch_entry**) associated to *ctx* + * and store it in the buffer pointed by *buf* up to size * *size* bytes. * Return * On success, number of bytes written to *buf*. On error, a * negative value. * * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to - * instead return the number of bytes required to store all the + * instead return the number of bytes required to store all the * branch entries. If this flag is set, *buf* may be NULL. * * **-EINVAL** if arguments invalid or **size** not a multiple - * of sizeof(struct perf_branch_entry). + * of **sizeof**\ (**struct perf_branch_entry**\ ). * * **-ENOENT** if architecture does not support branch records. * @@ -2985,8 +2985,8 @@ union bpf_attr { * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. - * - * On failure, the returned value is one of the following: + * Return + * 0 on success, or one of the following in case of failure: * * **-EINVAL** if dev and inum supplied don't match dev_t and inode number * with nsfs of current task, or if dev conversion to dev_t lost high bits. @@ -3025,8 +3025,8 @@ union bpf_attr { * a global identifier that can be assumed unique. If *ctx* is * NULL, then the helper returns the cookie for the initial * network namespace. The cookie itself is very similar to that - * of bpf_get_socket_cookie() helper, but for network namespaces - * instead of sockets. + * of **bpf_get_socket_cookie**\ () helper, but for network + * namespaces instead of sockets. * Return * A 8-byte long opaque number. * @@ -3061,57 +3061,66 @@ union bpf_attr { * * The *flags* argument must be zero. * Return - * 0 on success, or a negative errno in case of failure. + * 0 on success, or a negative error in case of failure: * - * * **-EINVAL** Unsupported flags specified. - * * **-ENOENT** Socket is unavailable for assignment. - * * **-ENETUNREACH** Socket is unreachable (wrong netns). - * * **-EOPNOTSUPP** Unsupported operation, for example a - * call from outside of TC ingress. - * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). + * **-EINVAL** if specified *flags* are not supported. + * + * **-ENOENT** if the socket is unavailable for assignment. + * + * **-ENETUNREACH** if the socket is unreachable (wrong netns). + * + * **-EOPNOTSUPP** if the operation is not supported, for example + * a call from outside of TC ingress. + * + * **-ESOCKTNOSUPPORT** if the socket type is not supported + * (reuseport). * * u64 bpf_ktime_get_boot_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. * Does include the time the system was suspended. - * See: clock_gettime(CLOCK_BOOTTIME) + * See: **clock_gettime**\ (**CLOCK_BOOTTIME**) * Return * Current *ktime*. * * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description - * seq_printf uses seq_file seq_printf() to print out the format string. + * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print + * out the format string. * The *m* represents the seq_file. The *fmt* and *fmt_size* are for * the format string itself. The *data* and *data_len* are format string - * arguments. The *data* are a u64 array and corresponding format string + * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the *data* size in term of bytes. + * The *data_len* is the size of *data* in bytes. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or * valid address but requiring a major memory fault. If reading kernel memory * fails, the string for **%s** will be an empty string, and the ip * address for **%p{i,I}{4,6}** will be 0. Not returning error to - * bpf program is consistent with what bpf_trace_printk() does for now. + * bpf program is consistent with what **bpf_trace_printk**\ () does for now. * Return - * 0 on success, or a negative errno in case of failure. + * 0 on success, or a negative error in case of failure: + * + * **-EBUSY** if per-CPU memory copy buffer is busy, can try again + * by returning 1 from bpf program. + * + * **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported. + * + * **-E2BIG** if *fmt* contains too many format specifiers. * - * * **-EBUSY** Percpu memory copy buffer is busy, can try again - * by returning 1 from bpf program. - * * **-EINVAL** Invalid arguments, or invalid/unsupported formats. - * * **-E2BIG** Too many format specifiers. - * * **-EOVERFLOW** Overflow happens, the same object will be tried again. + * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description - * seq_write uses seq_file seq_write() to write the data. + * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the - * data to write in bytes. + * data to write in bytes. * Return - * 0 on success, or a negative errno in case of failure. + * 0 on success, or a negative error in case of failure: * - * * **-EOVERFLOW** Overflow happens, the same object will be tried again. + * **-EOVERFLOW** if an overflow happened: The same object will be tried again. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index ded304c96a05..91fa668fa860 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -318,6 +318,11 @@ may be interested in: of eBPF maps are used with a given helper function. * *kernel/bpf/* directory contains other files in which additional helpers are defined (for cgroups, sockmaps, etc.). +* The bpftool utility can be used to probe the availability of helper functions + on the system (as well as supported program and map types, and a number of + other parameters). To do so, run **bpftool feature probe** (see + **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to + list features available to unprivileged users. Compatibility between helper functions and program types can generally be found in the files where helper functions are defined. Look for the **struct @@ -338,6 +343,7 @@ SEE ALSO ======== **bpf**\ (2), +**bpftool**\ (8), **cgroups**\ (7), **ip**\ (8), **perf_event_open**\ (2), -- cgit v1.2.3 From 6b9ea5ff5abdcda9d1291d9b8bbad44c93c7ccef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 May 2020 10:08:07 -0700 Subject: checkpatch: warn about uses of ENOTSUPP ENOTSUPP often feels like the right error code to use, but it's in fact not a standard Unix error. E.g.: $ python >>> import errno >>> errno.errorcode[errno.ENOTSUPP] Traceback (most recent call last): File "", line 1, in AttributeError: module 'errno' has no attribute 'ENOTSUPP' There were numerous commits converting the uses back to EOPNOTSUPP but in some cases we are stuck with the high error code for backward compatibility reasons. Let's try prevent more ENOTSUPPs from getting into the kernel. Recent example: https://lore.kernel.org/netdev/20200510182252.GA411829@lunn.ch/ v3 (Joe): - fix the "not file" condition. v2 (Joe): - add a link to recent discussion, - don't match when scanning files, not patches to avoid sudden influx of conversion patches. https://lore.kernel.org/netdev/20200511165319.2251678-1-kuba@kernel.org/ v1: https://lore.kernel.org/netdev/20200510185148.2230767-1-kuba@kernel.org/ Suggested-by: Andrew Lunn Signed-off-by: Jakub Kicinski Acked-by: Joe Perches Signed-off-by: David S. Miller --- scripts/checkpatch.pl | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'scripts') diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index eac40f0abd56..2be07ed4d70c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4199,6 +4199,17 @@ sub process { "ENOSYS means 'invalid syscall nr' and nothing else\n" . $herecurr); } +# ENOTSUPP is not a standard error code and should be avoided in new patches. +# Folks usually mean EOPNOTSUPP (also called ENOTSUP), when they type ENOTSUPP. +# Similarly to ENOSYS warning a small number of false positives is expected. + if (!$file && $line =~ /\bENOTSUPP\b/) { + if (WARN("ENOTSUPP", + "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\bENOTSUPP\b/EOPNOTSUPP/; + } + } + # function brace can't be on same line, except for #defines of do while, # or if closed on same line if ($perl_version_ok && -- cgit v1.2.3