From 3798680f2fbbe0ca3ab6138b34e0d161c36497ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Nov 2023 13:12:58 +0000 Subject: rxrpc: Fix RTT determination to use any ACK as a source Fix RTT determination to be able to use any type of ACK as the response from which RTT can be calculated provided its ack.serial is non-zero and matches the serial number of an outgoing DATA or ACK packet. This shouldn't be limited to REQUESTED-type ACKs as these can have other types substituted for them for things like duplicate or out-of-order packets. Fixes: 4700c4d80b7b ("rxrpc: Fix loss of RTT samples due to interposed ACK") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 4c53a5ef6257..f7e537f64db4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -328,7 +328,7 @@ E_(rxrpc_rtt_tx_ping, "PING") #define rxrpc_rtt_rx_traces \ - EM(rxrpc_rtt_rx_cancel, "CNCL") \ + EM(rxrpc_rtt_rx_other_ack, "OACK") \ EM(rxrpc_rtt_rx_obsolete, "OBSL") \ EM(rxrpc_rtt_rx_lost, "LOST") \ EM(rxrpc_rtt_rx_ping_response, "PONG") \ -- cgit v1.2.3 From 79e0c5be8c73a674c92bd4ba77b75f4f8c91d32e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:13 +0100 Subject: net, vrf: Move dstats structure to core Just move struct pcpu_dstats out of the vrf into the core, and streamline the field names slightly, so they better align with the {t,l}stats ones. No functional change otherwise. A conversion of the u64s to u64_stats_t could be done at a separate point in future. This move is needed as we are moving the {t,l,d}stats allocation/freeing to the core. Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: Jakub Kicinski Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-2-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/vrf.c | 24 +++++++----------------- include/linux/netdevice.h | 10 ++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index db766941b78f..3e6e0fdc3ba7 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -121,22 +121,12 @@ struct net_vrf { int ifindex; }; -struct pcpu_dstats { - u64 tx_pkts; - u64 tx_bytes; - u64 tx_drps; - u64 rx_pkts; - u64 rx_bytes; - u64 rx_drps; - struct u64_stats_sync syncp; -}; - static void vrf_rx_stats(struct net_device *dev, int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->rx_pkts++; + dstats->rx_packets++; dstats->rx_bytes += len; u64_stats_update_end(&dstats->syncp); } @@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev, do { start = u64_stats_fetch_begin(&dstats->syncp); tbytes = dstats->tx_bytes; - tpkts = dstats->tx_pkts; - tdrops = dstats->tx_drps; + tpkts = dstats->tx_packets; + tdrops = dstats->tx_drops; rbytes = dstats->rx_bytes; - rpkts = dstats->rx_pkts; + rpkts = dstats->rx_packets; } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; @@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) vrf_rx_stats(dev, len); else - this_cpu_inc(dev->dstats->rx_drps); + this_cpu_inc(dev->dstats->rx_drops); return NETDEV_TX_OK; } @@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->tx_pkts++; + dstats->tx_packets++; dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { - this_cpu_inc(dev->dstats->tx_drps); + this_cpu_inc(dev->dstats->tx_drops); } return ret; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a16c9cc063fe..98082113156e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2755,6 +2755,16 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); +struct pcpu_dstats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_drops; + u64 tx_packets; + u64 tx_bytes; + u64 tx_drops; + struct u64_stats_sync syncp; +} __aligned(8 * sizeof(u64)); + struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; -- cgit v1.2.3 From 34d21de99cea9cb17967874313e5b0262527833c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:14 +0100 Subject: net: Move {l,t,d}stats allocation to core and convert veth & vrf Move {l,t,d}stats allocation to the core and let netdevs pick the stats type they need. That way the driver doesn't have to bother with error handling (allocation failure checking, making sure free happens in the right spot, etc) - all happening in the core. Co-developed-by: Jakub Kicinski Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Cc: David Ahern Link: https://lore.kernel.org/r/20231114004220.6495-3-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/veth.c | 16 ++-------------- drivers/net/vrf.c | 14 +++----------- include/linux/netdevice.h | 20 +++++++++++++++---- net/core/dev.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 69 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 9980517ed8b0..ac030c241d1a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1506,25 +1506,12 @@ static void veth_free_queues(struct net_device *dev) static int veth_dev_init(struct net_device *dev) { - int err; - - dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); - if (!dev->lstats) - return -ENOMEM; - - err = veth_alloc_queues(dev); - if (err) { - free_percpu(dev->lstats); - return err; - } - - return 0; + return veth_alloc_queues(dev); } static void veth_dev_free(struct net_device *dev) { veth_free_queues(dev); - free_percpu(dev->lstats); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1796,6 +1783,7 @@ static void veth_setup(struct net_device *dev) NETIF_F_HW_VLAN_STAG_RX); dev->needs_free_netdev = true; dev->priv_destructor = veth_dev_free; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; dev->max_mtu = ETH_MAX_MTU; dev->hw_features = VETH_FEATURES; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3e6e0fdc3ba7..bb95ce43cd97 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1164,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev) vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); - - free_percpu(dev->dstats); - dev->dstats = NULL; } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); - dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); - if (!dev->dstats) - goto out_nomem; - /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) - goto out_stats; + goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; @@ -1193,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev) out_rth: vrf_rtable_release(dev, vrf); -out_stats: - free_percpu(dev->dstats); - dev->dstats = NULL; out_nomem: return -ENOMEM; } @@ -1694,6 +1684,8 @@ static void vrf_setup(struct net_device *dev) dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; + + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 98082113156e..2564e209465e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type { ML_PRIV_CAN, }; +enum netdev_stat_type { + NETDEV_PCPU_STAT_NONE, + NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ + NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ + NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ +}; + /** * struct net_device - The DEVICE structure. * @@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type { * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type - * @lstats: Loopback statistics - * @tstats: Tunnel statistics - * @dstats: Dummy statistics - * @vstats: Virtual ethernet statistics + * + * @pcpu_stat_type: Type of device statistics which the core should + * allocate/free: none, lstats, tstats, dstats. none + * means the driver is handling statistics allocation/ + * freeing internally. + * @lstats: Loopback statistics: packets, bytes + * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes + * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP @@ -2354,6 +2365,7 @@ struct net_device { void *ml_priv; enum netdev_ml_priv_type ml_priv_type; + enum netdev_stat_type pcpu_stat_type:8; union { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; diff --git a/net/core/dev.c b/net/core/dev.c index af53f6d838ce..0cc6e283edba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10051,6 +10051,46 @@ void netif_tx_stop_all_queues(struct net_device *dev) } EXPORT_SYMBOL(netif_tx_stop_all_queues); +static int netdev_do_alloc_pcpu_stats(struct net_device *dev) +{ + void __percpu *v; + + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return 0; + case NETDEV_PCPU_STAT_LSTATS: + v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); + break; + default: + return -EINVAL; + } + + return v ? 0 : -ENOMEM; +} + +static void netdev_do_free_pcpu_stats(struct net_device *dev) +{ + switch (dev->pcpu_stat_type) { + case NETDEV_PCPU_STAT_NONE: + return; + case NETDEV_PCPU_STAT_LSTATS: + free_percpu(dev->lstats); + break; + case NETDEV_PCPU_STAT_TSTATS: + free_percpu(dev->tstats); + break; + case NETDEV_PCPU_STAT_DSTATS: + free_percpu(dev->dstats); + break; + } +} + /** * register_netdevice() - register a network device * @dev: device to register @@ -10111,9 +10151,13 @@ int register_netdevice(struct net_device *dev) goto err_uninit; } + ret = netdev_do_alloc_pcpu_stats(dev); + if (ret) + goto err_uninit; + ret = dev_index_reserve(net, dev->ifindex); if (ret < 0) - goto err_uninit; + goto err_free_pcpu; dev->ifindex = ret; /* Transfer changeable features to wanted_features and enable @@ -10219,6 +10263,8 @@ err_uninit_notify: call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); err_ifindex_release: dev_index_release(net, dev->ifindex); +err_free_pcpu: + netdev_do_free_pcpu_stats(dev); err_uninit: if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); @@ -10471,6 +10517,7 @@ void netdev_run_todo(void) WARN_ON(rcu_access_pointer(dev->ip_ptr)); WARN_ON(rcu_access_pointer(dev->ip6_ptr)); + netdev_do_free_pcpu_stats(dev); if (dev->priv_destructor) dev->priv_destructor(dev); if (dev->needs_free_netdev) -- cgit v1.2.3 From 2c225425704078282e152ba692649237f78b3d7a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 14 Nov 2023 01:42:18 +0100 Subject: bpf, netkit: Add indirect call wrapper for fetching peer dev ndo_get_peer_dev is used in tcx BPF fast path, therefore make use of indirect call wrapper and therefore optimize the bpf_redirect_peer() internal handling a bit. Add a small skb_get_peer_dev() wrapper which utilizes the INDIRECT_CALL_1() macro instead of open coding. Future work could potentially add a peer pointer directly into struct net_device in future and convert veth and netkit over to use it so that eventually ndo_get_peer_dev can be removed. Co-developed-by: Nikolay Aleksandrov Signed-off-by: Nikolay Aleksandrov Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231114004220.6495-7-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- drivers/net/netkit.c | 3 ++- include/net/netkit.h | 6 ++++++ net/core/filter.c | 18 +++++++++++++----- 3 files changed, 21 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 99de11f9cde5..97bd6705c241 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,7 @@ out: rcu_read_unlock(); } -static struct net_device *netkit_peer_dev(struct net_device *dev) +INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev) { return rcu_dereference(netkit_priv(dev)->peer); } diff --git a/include/net/netkit.h b/include/net/netkit.h index 0ba2e6b847ca..9ec0163739f4 100644 --- a/include/net/netkit.h +++ b/include/net/netkit.h @@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog); int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev)); #else static inline int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline struct net_device *netkit_peer_dev(struct net_device *dev) +{ + return NULL; +} #endif /* CONFIG_NETKIT */ #endif /* __NET_NETKIT_H */ diff --git a/net/core/filter.c b/net/core/filter.c index cca810987c8d..7e4d7c3bcc84 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include "dev.h" @@ -2468,6 +2469,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); +static struct net_device *skb_get_peer_dev(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (likely(ops->ndo_get_peer_dev)) + return INDIRECT_CALL_1(ops->ndo_get_peer_dev, + netkit_peer_dev, dev); + return NULL; +} + int skb_do_redirect(struct sk_buff *skb) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -2481,12 +2492,9 @@ int skb_do_redirect(struct sk_buff *skb) if (unlikely(!dev)) goto out_drop; if (flags & BPF_F_PEER) { - const struct net_device_ops *ops = dev->netdev_ops; - - if (unlikely(!ops->ndo_get_peer_dev || - !skb_at_tc_ingress(skb))) + if (unlikely(!skb_at_tc_ingress(skb))) goto out_drop; - dev = ops->ndo_get_peer_dev(dev); + dev = skb_get_peer_dev(dev); if (unlikely(!dev || !(dev->flags & IFF_UP) || net_eq(net, dev_net(dev)))) -- cgit v1.2.3 From ab5cfac139ab8576fb54630d4cca23c3e690ee90 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:06:56 +0200 Subject: bpf: verify callbacks as if they are called unknown number of times Prior to this patch callbacks were handled as regular function calls, execution of callback body was modeled exactly once. This patch updates callbacks handling logic as follows: - introduces a function push_callback_call() that schedules callback body verification in env->head stack; - updates prepare_func_exit() to reschedule callback body verification upon BPF_EXIT; - as calls to bpf_*_iter_next(), calls to callback invoking functions are marked as checkpoints; - is_state_visited() is updated to stop callback based iteration when some identical parent state is found. Paths with callback function invoked zero times are now verified first, which leads to necessity to modify some selftests: - the following negative tests required adding release/unlock/drop calls to avoid previously masked unrelated error reports: - cb_refs.c:underflow_prog - exceptions_fail.c:reject_rbtree_add_throw - exceptions_fail.c:reject_with_cp_reference - the following precision tracking selftests needed change in expected log trace: - verifier_subprog_precision.c:callback_result_precise (note: r0 precision is no longer propagated inside callback and I think this is a correct behavior) - verifier_subprog_precision.c:parent_callee_saved_reg_precise_with_callback - verifier_subprog_precision.c:parent_stack_slot_precise_with_callback Reported-by: Andrew Werner Closes: https://lore.kernel.org/bpf/CA+vRuzPChFNXmouzGG+wsy=6eMcfr1mFG0F3g7rbg-sedGKW3w@mail.gmail.com/ Acked-by: Andrii Nakryiko Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-7-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 5 + kernel/bpf/verifier.c | 274 +++++++++++++-------- tools/testing/selftests/bpf/progs/cb_refs.c | 1 + .../testing/selftests/bpf/progs/exceptions_fail.c | 2 + .../bpf/progs/verifier_subprog_precision.c | 71 ++++-- 5 files changed, 240 insertions(+), 113 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 24213a99cc79..dd326936dd6f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -400,6 +400,7 @@ struct bpf_verifier_state { struct bpf_idx_pair *jmp_history; u32 jmp_history_cnt; u32 dfs_depth; + u32 callback_unroll_depth; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -511,6 +512,10 @@ struct bpf_insn_aux_data { * this instruction, regardless of any heuristics */ bool force_checkpoint; + /* true if instruction is a call to a helper function that + * accepts callback function as a parameter. + */ + bool calls_callback; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 68ee4803d3a2..a60dfa56ebb3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -547,13 +547,12 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_dynptr_data; } -static bool is_callback_calling_kfunc(u32 btf_id); +static bool is_sync_callback_calling_kfunc(u32 btf_id); static bool is_bpf_throw_kfunc(struct bpf_insn *insn); -static bool is_callback_calling_function(enum bpf_func_id func_id) +static bool is_sync_callback_calling_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_for_each_map_elem || - func_id == BPF_FUNC_timer_set_callback || func_id == BPF_FUNC_find_vma || func_id == BPF_FUNC_loop || func_id == BPF_FUNC_user_ringbuf_drain; @@ -564,6 +563,18 @@ static bool is_async_callback_calling_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_timer_set_callback; } +static bool is_callback_calling_function(enum bpf_func_id func_id) +{ + return is_sync_callback_calling_function(func_id) || + is_async_callback_calling_function(func_id); +} + +static bool is_sync_callback_calling_insn(struct bpf_insn *insn) +{ + return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) || + (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); +} + static bool is_storage_get_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_storage_get || @@ -1808,6 +1819,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->first_insn_idx = src->first_insn_idx; dst_state->last_insn_idx = src->last_insn_idx; dst_state->dfs_depth = src->dfs_depth; + dst_state->callback_unroll_depth = src->callback_unroll_depth; dst_state->used_as_loop_entry = src->used_as_loop_entry; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; @@ -3731,6 +3743,8 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) } } +static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); + /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. @@ -3906,16 +3920,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, return -EFAULT; return 0; } - } else if ((bpf_helper_call(insn) && - is_callback_calling_function(insn->imm) && - !is_async_callback_calling_function(insn->imm)) || - (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) { - /* callback-calling helper or kfunc call, which means - * we are exiting from subprog, but unlike the subprog - * call handling above, we shouldn't propagate - * precision of r1-r5 (if any requested), as they are - * not actually arguments passed directly to callback - * subprogs + } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) { + /* exit from callback subprog to callback-calling helper or + * kfunc call. Use idx/subseq_idx check to discern it from + * straight line code backtracking. + * Unlike the subprog call handling above, we shouldn't + * propagate precision of r1-r5 (if any requested), as they are + * not actually arguments passed directly to callback subprogs */ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); @@ -3950,10 +3961,18 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, } else if (opcode == BPF_EXIT) { bool r0_precise; + /* Backtracking to a nested function call, 'idx' is a part of + * the inner frame 'subseq_idx' is a part of the outer frame. + * In case of a regular function call, instructions giving + * precision to registers R1-R5 should have been found already. + * In case of a callback, it is ok to have R1-R5 marked for + * backtracking, as these registers are set by the function + * invoking callback. + */ + if (subseq_idx >= 0 && calls_callback(env, subseq_idx)) + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + bt_clear_reg(bt, i); if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { - /* if backtracing was looking for registers R1-R5 - * they should have been found already. - */ verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; @@ -9421,11 +9440,11 @@ err_out: return err; } -static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx, int subprog, - set_callee_state_fn set_callee_state_cb) +static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int insn_idx, int subprog, + set_callee_state_fn set_callee_state_cb) { - struct bpf_verifier_state *state = env->cur_state; + struct bpf_verifier_state *state = env->cur_state, *callback_state; struct bpf_func_state *caller, *callee; int err; @@ -9433,44 +9452,22 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = btf_check_subprog_call(env, subprog, caller->regs); if (err == -EFAULT) return err; - if (subprog_is_global(env, subprog)) { - if (err) { - verbose(env, "Caller passes invalid args into func#%d\n", - subprog); - return err; - } else { - if (env->log.level & BPF_LOG_LEVEL) - verbose(env, - "Func#%d is global and valid. Skipping.\n", - subprog); - clear_caller_saved_regs(env, caller->regs); - - /* All global functions return a 64-bit SCALAR_VALUE */ - mark_reg_unknown(env, caller->regs, BPF_REG_0); - caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; - - /* continue with next insn after call */ - return 0; - } - } /* set_callee_state is used for direct subprog calls, but we are * interested in validating only BPF helpers that can call subprogs as * callbacks */ - if (set_callee_state_cb != set_callee_state) { - env->subprog_info[subprog].is_cb = true; - if (bpf_pseudo_kfunc_call(insn) && - !is_callback_calling_kfunc(insn->imm)) { - verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); - return -EFAULT; - } else if (!bpf_pseudo_kfunc_call(insn) && - !is_callback_calling_function(insn->imm)) { /* helper */ - verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", - func_id_name(insn->imm), insn->imm); - return -EFAULT; - } + env->subprog_info[subprog].is_cb = true; + if (bpf_pseudo_kfunc_call(insn) && + !is_sync_callback_calling_kfunc(insn->imm)) { + verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; + } else if (!bpf_pseudo_kfunc_call(insn) && + !is_callback_calling_function(insn->imm)) { /* helper */ + verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", + func_id_name(insn->imm), insn->imm); + return -EFAULT; } if (insn->code == (BPF_JMP | BPF_CALL) && @@ -9481,25 +9478,76 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* there is no real recursion here. timer callbacks are async */ env->subprog_info[subprog].is_async_cb = true; async_cb = push_async_cb(env, env->subprog_info[subprog].start, - *insn_idx, subprog); + insn_idx, subprog); if (!async_cb) return -EFAULT; callee = async_cb->frame[0]; callee->async_entry_cnt = caller->async_entry_cnt + 1; /* Convert bpf_timer_set_callback() args into timer callback args */ - err = set_callee_state_cb(env, caller, callee, *insn_idx); + err = set_callee_state_cb(env, caller, callee, insn_idx); if (err) return err; + return 0; + } + + /* for callback functions enqueue entry to callback and + * proceed with next instruction within current frame. + */ + callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false); + if (!callback_state) + return -ENOMEM; + + err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb, + callback_state); + if (err) + return err; + + callback_state->callback_unroll_depth++; + return 0; +} + +static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *caller; + int err, subprog, target_insn; + + target_insn = *insn_idx + insn->imm + 1; + subprog = find_subprog(env, target_insn); + if (subprog < 0) { + verbose(env, "verifier bug. No program starts at insn %d\n", target_insn); + return -EFAULT; + } + + caller = state->frame[state->curframe]; + err = btf_check_subprog_call(env, subprog, caller->regs); + if (err == -EFAULT) + return err; + if (subprog_is_global(env, subprog)) { + if (err) { + verbose(env, "Caller passes invalid args into func#%d\n", subprog); + return err; + } + + if (env->log.level & BPF_LOG_LEVEL) + verbose(env, "Func#%d is global and valid. Skipping.\n", subprog); clear_caller_saved_regs(env, caller->regs); + + /* All global functions return a 64-bit SCALAR_VALUE */ mark_reg_unknown(env, caller->regs, BPF_REG_0); caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + /* continue with next insn after call */ return 0; } - err = setup_func_entry(env, subprog, *insn_idx, set_callee_state_cb, state); + /* for regular function entry setup new frame and continue + * from that frame. + */ + err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state); if (err) return err; @@ -9559,22 +9607,6 @@ static int set_callee_state(struct bpf_verifier_env *env, return 0; } -static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx) -{ - int subprog, target_insn; - - target_insn = *insn_idx + insn->imm + 1; - subprog = find_subprog(env, target_insn); - if (subprog < 0) { - verbose(env, "verifier bug. No program starts at insn %d\n", - target_insn); - return -EFAULT; - } - - return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); -} - static int set_map_elem_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -9798,6 +9830,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); return -EINVAL; } + if (!calls_callback(env, callee->callsite)) { + verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n", + *insn_idx, callee->callsite); + return -EFAULT; + } } else { /* return to the caller whatever r0 had in the callee */ caller->regs[BPF_REG_0] = *r0; @@ -9815,7 +9852,15 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return err; } - *insn_idx = callee->callsite + 1; + /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, + * there function call logic would reschedule callback visit. If iteration + * converges is_state_visited() would prune that visit eventually. + */ + if (callee->in_callback_fn) + *insn_idx = callee->callsite; + else + *insn_idx = callee->callsite + 1; + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); print_verifier_state(env, callee, true); @@ -10228,24 +10273,24 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } break; case BPF_FUNC_for_each_map_elem: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_map_elem_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_map_elem_callback_state); break; case BPF_FUNC_timer_set_callback: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_timer_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_timer_callback_state); break; case BPF_FUNC_find_vma: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_find_vma_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_find_vma_callback_state); break; case BPF_FUNC_snprintf: err = check_bpf_snprintf_call(env, regs); break; case BPF_FUNC_loop: update_loop_inline_state(env, meta.subprogno); - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_loop_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_loop_callback_state); break; case BPF_FUNC_dynptr_from_mem: if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { @@ -10341,8 +10386,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; } case BPF_FUNC_user_ringbuf_drain: - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_user_ringbuf_callback_state); + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_user_ringbuf_callback_state); break; } @@ -11230,7 +11275,7 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; } -static bool is_callback_calling_kfunc(u32 btf_id) +static bool is_sync_callback_calling_kfunc(u32 btf_id) { return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } @@ -11982,6 +12027,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EACCES; } + /* Check the arguments */ + err = check_kfunc_args(env, &meta, insn_idx); + if (err < 0) + return err; + + if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_rbtree_add_callback_state); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, meta.func_id); + return err; + } + } + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); @@ -12017,10 +12077,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } - /* Check the arguments */ - err = check_kfunc_args(env, &meta, insn_idx); - if (err < 0) - return err; /* In case of release function, we get register number of refcounted * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ @@ -12054,16 +12110,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } - if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { - err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, - set_rbtree_add_callback_state); - if (err) { - verbose(env, "kfunc %s#%d failed callback verification\n", - func_name, meta.func_id); - return err; - } - } - if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { if (!bpf_jit_supports_exceptions()) { verbose(env, "JIT does not support calling kfunc %s#%d\n", @@ -15427,6 +15473,15 @@ static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx) return env->insn_aux_data[insn_idx].force_checkpoint; } +static void mark_calls_callback(struct bpf_verifier_env *env, int idx) +{ + env->insn_aux_data[idx].calls_callback = true; +} + +static bool calls_callback(struct bpf_verifier_env *env, int insn_idx) +{ + return env->insn_aux_data[insn_idx].calls_callback; +} enum { DONE_EXPLORING = 0, @@ -15540,6 +15595,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env) * async state will be pushed for further exploration. */ mark_prune_point(env, t); + /* For functions that invoke callbacks it is not known how many times + * callback would be called. Verifier models callback calling functions + * by repeatedly visiting callback bodies and returning to origin call + * instruction. + * In order to stop such iteration verifier needs to identify when a + * state identical some state from a previous iteration is reached. + * Check below forces creation of checkpoint before callback calling + * instruction to allow search for such identical states. + */ + if (is_sync_callback_calling_insn(insn)) { + mark_calls_callback(env, t); + mark_force_checkpoint(env, t); + mark_prune_point(env, t); + mark_jmp_point(env, t); + } if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; @@ -17009,10 +17079,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) } goto skip_inf_loop_check; } + if (calls_callback(env, insn_idx)) { + if (states_equal(env, &sl->state, cur, true)) + goto hit; + goto skip_inf_loop_check; + } /* attempt to detect infinite loop to avoid unnecessary doomed work */ if (states_maybe_looping(&sl->state, cur) && states_equal(env, &sl->state, cur, false) && - !iter_active_depths_differ(&sl->state, cur)) { + !iter_active_depths_differ(&sl->state, cur) && + sl->state.callback_unroll_depth == cur->callback_unroll_depth) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); verbose(env, "cur state:"); diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 76d661b20e87..56c764df8196 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -33,6 +33,7 @@ int underflow_prog(void *ctx) if (!p) return 0; bpf_for_each_map_elem(&array_map, cb1, &p, 0); + bpf_kfunc_call_test_release(p); return 0; } diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 4c39e920dac2..8c0ef2742208 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx) return 0; bpf_spin_lock(&lock); bpf_rbtree_add(&rbtree, &f->node, rbless); + bpf_spin_unlock(&lock); return 0; } @@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx) if (!f) return 0; bpf_loop(5, subprog_cb_ref, NULL, 0); + bpf_obj_drop(f); return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index db6b3143338b..da803cffb5ef 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,15 +119,26 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") -__msg("mark_precise: frame0: parent state regs=r0 stack=:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop") +/* State entering callback body popped from states stack */ +__msg("from 9 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r4 (flags) is always precise for bpf_loop() */ +__msg("frame 0: propagating r4") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit") +__msg("from 18 to 9: safe") __naked int callback_result_precise(void) { asm volatile ( @@ -233,20 +244,36 @@ __naked int parent_callee_saved_reg_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("12: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: last_idx 12 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs=r6 stack=:") -__msg("mark_precise: frame0: last_idx 16 first_idx 0") -__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 9 to 15: frame1:") +__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("15: (b7) r0 = 0") +__msg("16: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r4 (flags) is always precise for bpf_loop(), + * r6 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r4,r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 16 to 9: safe") __naked int parent_callee_saved_reg_precise_with_callback(void) { asm volatile ( @@ -373,22 +400,38 @@ __naked int parent_stack_slot_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10") __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 10 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 10:") +/* r4 (flags) is always precise for bpf_loop(), + * fp-8 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r4,fp-8") +__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") +__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 18 to 10: safe") __naked int parent_stack_slot_precise_with_callback(void) { asm volatile ( -- cgit v1.2.3 From bb124da69c47dd98d69361ec13244ece50bec63e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Nov 2023 04:07:00 +0200 Subject: bpf: keep track of max number of bpf_loop callback iterations In some cases verifier can't infer convergence of the bpf_loop() iteration. E.g. for the following program: static int cb(__u32 idx, struct num_context* ctx) { ctx->i++; return 0; } SEC("?raw_tp") int prog(void *_) { struct num_context ctx = { .i = 0 }; __u8 choice_arr[2] = { 0, 1 }; bpf_loop(2, cb, &ctx, 0); return choice_arr[ctx.i]; } Each 'cb' simulation would eventually return to 'prog' and reach 'return choice_arr[ctx.i]' statement. At which point ctx.i would be marked precise, thus forcing verifier to track multitude of separate states with {.i=0}, {.i=1}, ... at bpf_loop() callback entry. This commit allows "brute force" handling for such cases by limiting number of callback body simulations using 'umax' value of the first bpf_loop() parameter. For this, extend bpf_func_state with 'callback_depth' field. Increment this field when callback visiting state is pushed to states traversal stack. For frame #N it's 'callback_depth' field counts how many times callback with frame depth N+1 had been executed. Use bpf_func_state specifically to allow independent tracking of callback depths when multiple nested bpf_loop() calls are present. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231121020701.26440-11-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 11 +++++++ kernel/bpf/verifier.c | 19 ++++++++++-- .../bpf/progs/verifier_subprog_precision.c | 35 +++++++++++++++------- 3 files changed, 53 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index dd326936dd6f..aa4d19d0bc94 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -301,6 +301,17 @@ struct bpf_func_state { struct tnum callback_ret_range; bool in_async_callback_fn; bool in_exception_callback_fn; + /* For callback calling functions that limit number of possible + * callback executions (e.g. bpf_loop) keeps track of current + * simulated iteration number. + * Value in frame N refers to number of times callback with frame + * N+1 was simulated, e.g. for the following call: + * + * bpf_loop(..., fn, ...); | suppose current frame is N + * | fn would be simulated in frame N+1 + * | number of simulations is tracked in frame N + */ + u32 callback_depth; /* The following fields should be last. See copy_func_state() */ int acquired_refs; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2f03e6b11bb9..af2819d5c8ee 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9505,6 +9505,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins return err; callback_state->callback_unroll_depth++; + callback_state->frame[callback_state->curframe - 1]->callback_depth++; + caller->callback_depth = 0; return 0; } @@ -10309,8 +10311,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn break; case BPF_FUNC_loop: update_loop_inline_state(env, meta.subprogno); - err = push_callback_call(env, insn, insn_idx, meta.subprogno, - set_loop_callback_state); + /* Verifier relies on R1 value to determine if bpf_loop() iteration + * is finished, thus mark it precise. + */ + err = mark_chain_precision(env, BPF_REG_1); + if (err) + return err; + if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_loop_callback_state); + } else { + cur_func(env)->callback_depth = 0; + if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "frame%d bpf_loop iteration limit reached\n", + env->cur_state->curframe); + } break; case BPF_FUNC_dynptr_from_mem: if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index da803cffb5ef..f61d623b1ce8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,7 +119,23 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) -/* First simulated path does not include callback body */ +/* First simulated path does not include callback body, + * r1 and r4 are always precise for bpf_loop() calls. + */ +__msg("9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r4 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r1 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* r6 precision propagation */ __msg("14: (0f) r1 += r6") __msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") @@ -134,10 +150,9 @@ __msg("17: (b7) r0 = 0") __msg("18: (95) exit") __msg("returning from callee:") __msg("to caller at 9:") -/* r4 (flags) is always precise for bpf_loop() */ -__msg("frame 0: propagating r4") +__msg("frame 0: propagating r1,r4") __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") -__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit") __msg("from 18 to 9: safe") __naked int callback_result_precise(void) { @@ -264,12 +279,12 @@ __msg("15: (b7) r0 = 0") __msg("16: (95) exit") __msg("returning from callee:") __msg("to caller at 9:") -/* r4 (flags) is always precise for bpf_loop(), +/* r1, r4 are always precise for bpf_loop(), * r6 was marked before backtracking to callback body. */ -__msg("frame 0: propagating r4,r6") +__msg("frame 0: propagating r1,r4,r6") __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") -__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit") __msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") __msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=:") @@ -422,12 +437,12 @@ __msg("17: (b7) r0 = 0") __msg("18: (95) exit") __msg("returning from callee:") __msg("to caller at 10:") -/* r4 (flags) is always precise for bpf_loop(), +/* r1, r4 are always precise for bpf_loop(), * fp-8 was marked before backtracking to callback body. */ -__msg("frame 0: propagating r4,fp-8") +__msg("frame 0: propagating r1,r4,fp-8") __msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") -__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit") __msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") __msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") __msg("mark_precise: frame0: parent state regs= stack=:") -- cgit v1.2.3