summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-11-22 02:49:30 +0300
committerJakub Kicinski <kuba@kernel.org>2023-11-22 02:49:31 +0300
commitb2d66643dcf2c395207f9373c624e0ab32166e57 (patch)
treea9be214abb19a9f0f41a6ad2c8b42a39a64bd363 /tools
parent495ec91b48e489afefb2ad714f0d9b68c3016c6c (diff)
parentacb12c859ac7c36d6d7632280fd1e263188cb07f (diff)
downloadlinux-b2d66643dcf2c395207f9373c624e0ab32166e57.tar.xz
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2023-11-21 We've added 19 non-merge commits during the last 4 day(s) which contain a total of 18 files changed, 1043 insertions(+), 416 deletions(-). The main changes are: 1) Fix BPF verifier to validate callbacks as if they are called an unknown number of times in order to fix not detecting some unsafe programs, from Eduard Zingerman. 2) Fix bpf_redirect_peer() handling which missed proper stats accounting for veth and netkit and also generally fix missing stats for the latter, from Peilin Ye, Daniel Borkmann et al. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests/bpf: check if max number of bpf_loop iterations is tracked bpf: keep track of max number of bpf_loop callback iterations selftests/bpf: test widening for iterating callbacks bpf: widening for callback iterators selftests/bpf: tests for iterating callbacks bpf: verify callbacks as if they are called unknown number of times bpf: extract setup_func_entry() utility function bpf: extract __check_reg_arg() utility function selftests/bpf: fix bpf_loop_bench for new callback verification scheme selftests/bpf: track string payload offset as scalar in strobemeta selftests/bpf: track tcp payload offset as scalar in xdp_synproxy selftests/bpf: Add netkit to tc_redirect selftest selftests/bpf: De-veth-ize the tc_redirect test case bpf, netkit: Add indirect call wrapper for fetching peer dev bpf: Fix dev's rx stats for bpf_redirect_peer traffic veth: Use tstats per-CPU traffic counters netkit: Add tstats per-CPU traffic counters net: Move {l,t,d}stats allocation to core and convert veth & vrf net, vrf: Move dstats structure to core ==================== Link: https://lore.kernel.org/r/20231121193113.11796-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c317
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop_bench.c13
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c1
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h78
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c242
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c86
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c84
9 files changed, 617 insertions, 208 deletions
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 6ee22c3b251a..518f143c5b0f 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -24,6 +24,7 @@
#include "test_progs.h"
#include "network_helpers.h"
+#include "netlink_helpers.h"
#include "test_tc_neigh_fib.skel.h"
#include "test_tc_neigh.skel.h"
#include "test_tc_peer.skel.h"
@@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb)
}
}
+enum dev_mode {
+ MODE_VETH,
+ MODE_NETKIT,
+};
+
struct netns_setup_result {
- int ifindex_veth_src;
- int ifindex_veth_src_fwd;
- int ifindex_veth_dst;
- int ifindex_veth_dst_fwd;
+ enum dev_mode dev_mode;
+ int ifindex_src;
+ int ifindex_src_fwd;
+ int ifindex_dst;
+ int ifindex_dst_fwd;
};
static int get_ifaddr(const char *name, char *ifaddr)
@@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr)
return 0;
}
+static int create_netkit(int mode, char *prim, char *peer)
+{
+ struct rtattr *linkinfo, *data, *peer_info;
+ struct rtnl_handle rth = { .fd = -1 };
+ const char *type = "netkit";
+ struct {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+ } req = {};
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+ req.i.ifi_family = AF_UNSPEC;
+
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
+ linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+ addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+ data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
+ req.n.nlmsg_len += sizeof(struct ifinfomsg);
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
+ addattr_nest_end(&req.n, peer_info);
+ addattr_nest_end(&req.n, data);
+ addattr_nest_end(&req.n, linkinfo);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
struct nstoken *nstoken = NULL;
- char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-
- SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
- SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
+ char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+ int err;
- SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
- SYS(fail, "ip link set veth_dst address " MAC_DST);
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip link add src type veth peer name src_fwd");
+ SYS(fail, "ip link add dst type veth peer name dst_fwd");
+
+ SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
+ SYS(fail, "ip link set dst address " MAC_DST);
+ } else if (result->dev_mode == MODE_NETKIT) {
+ err = create_netkit(NETKIT_L3, "src", "src_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_src"))
+ goto fail;
+ err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_dst"))
+ goto fail;
+ }
- if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+ if (get_ifaddr("src_fwd", src_fwd_addr))
goto fail;
- result->ifindex_veth_src = if_nametoindex("veth_src");
- if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
+ result->ifindex_src = if_nametoindex("src");
+ if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
goto fail;
- result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
- if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
+ result->ifindex_src_fwd = if_nametoindex("src_fwd");
+ if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
goto fail;
- result->ifindex_veth_dst = if_nametoindex("veth_dst");
- if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
+ result->ifindex_dst = if_nametoindex("dst");
+ if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
goto fail;
- result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
- if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
+ result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
+ if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
goto fail;
- SYS(fail, "ip link set veth_src netns " NS_SRC);
- SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
- SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
- SYS(fail, "ip link set veth_dst netns " NS_DST);
+ SYS(fail, "ip link set src netns " NS_SRC);
+ SYS(fail, "ip link set src_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst netns " NS_DST);
/** setup in 'src' namespace */
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
- SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
- SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
- SYS(fail, "ip link set dev veth_src up");
+ SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
+ SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
+ SYS(fail, "ip link set dev src up");
- SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
- SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
- SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
- SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
- veth_src_fwd_addr);
- SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
- veth_src_fwd_addr);
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
+ src_fwd_addr);
+ SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
+ src_fwd_addr);
+ }
close_netns(nstoken);
@@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
* needs v4 one in order to start ARP probing. IP4_NET route is added
* to the endpoints so that the ARP processing will reply.
*/
- SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
- SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
- SYS(fail, "ip link set dev veth_src_fwd up");
- SYS(fail, "ip link set dev veth_dst_fwd up");
+ SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
+ SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
+ SYS(fail, "ip link set dev src_fwd up");
+ SYS(fail, "ip link set dev dst_fwd up");
- SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
- SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
- SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
- SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
close_netns(nstoken);
@@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
- SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
- SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
- SYS(fail, "ip link set dev veth_dst up");
+ SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
+ SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
+ SYS(fail, "ip link set dev dst up");
- SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
- SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
- SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
- SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
+ }
close_netns(nstoken);
@@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog,
const struct bpf_program *chk_prog,
const struct netns_setup_result *setup_result)
{
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
int err;
- /* tc qdisc add dev veth_src_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
- /* tc filter add dev veth_src_fwd ingress bpf da src_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
- /* tc filter add dev veth_src_fwd egress bpf da chk_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress bpf da src_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+ /* tc filter add dev src_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
- /* tc qdisc add dev veth_dst_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
- /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
- /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da dst_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+ /* tc filter add dev dst_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
return 0;
fail:
@@ -539,10 +600,10 @@ done:
static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
const struct netns_setup_result *setup_result)
{
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
struct nstoken *nstoken;
int err;
@@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return -1;
- /* tc qdisc add dev veth_src clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
- /* tc filter add dev veth_src ingress bpf da ingress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
- /* tc filter add dev veth_src egress bpf da egress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ /* tc qdisc add dev src clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+ /* tc filter add dev src ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev src egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken);
/* setup ns_dst tc progs */
nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
return -1;
- /* tc qdisc add dev veth_dst clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
- /* tc filter add dev veth_dst ingress bpf da ingress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
- /* tc filter add dev veth_dst egress bpf da egress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ /* tc qdisc add dev dst clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
+ /* tc filter add dev dst ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev dst egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken);
/* setup ns_fwd tc progs */
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
return -1;
- /* tc qdisc add dev veth_dst_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
- /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio100, 100);
- /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+ /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio101, 101);
- /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio100, 100);
- /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio101, 101);
- /* tc qdisc add dev veth_src_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
- /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio100, 100);
- /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+ /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio101, 101);
- /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio100, 100);
- /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio101, 101);
close_netns(nstoken);
return 0;
@@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
return;
- skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_dtime__load(skel);
if (!ASSERT_OK(err, "test_tc_dtime__load"))
@@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
goto done;
- skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_neigh__load(skel);
if (!ASSERT_OK(err, "test_tc_neigh__load"))
@@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
goto done;
- skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd)
static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
{
LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
struct test_tc_peer *skel = NULL;
struct nstoken *nstoken = NULL;
int err;
@@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
goto fail;
skel->rodata->IFINDEX_SRC = ifindex;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
* towards dst, and "tc_dst" to redirect packets
- * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ * and "tc_chk" on dst_fwd to drop non-redirected packets.
*/
/* tc qdisc add dev tun_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
- /* tc qdisc add dev veth_dst_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
- /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
- /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+ /* tc filter add dev dst_fwd egress bpf da tc_chk */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
/* Setup route and neigh tables */
SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
@@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
- SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
" dev tun_src scope global");
- SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
- SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
" dev tun_src scope global");
- SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
- SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto fail;
@@ -1106,9 +1167,9 @@ fail:
close_netns(nstoken);
}
-#define RUN_TEST(name) \
+#define RUN_TEST(name, mode) \
({ \
- struct netns_setup_result setup_result; \
+ struct netns_setup_result setup_result = { .dev_mode = mode, }; \
if (test__start_subtest(#name)) \
if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
@@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg)
{
netns_setup_namespaces_nofail("delete");
- RUN_TEST(tc_redirect_peer);
- RUN_TEST(tc_redirect_peer_l3);
- RUN_TEST(tc_redirect_neigh);
- RUN_TEST(tc_redirect_neigh_fib);
- RUN_TEST(tc_redirect_dtime);
+ RUN_TEST(tc_redirect_peer, MODE_VETH);
+ RUN_TEST(tc_redirect_peer, MODE_NETKIT);
+ RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
+ RUN_TEST(tc_redirect_neigh, MODE_VETH);
+ RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
+ RUN_TEST(tc_redirect_dtime, MODE_VETH);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e5c61aa6604a..5cfa7a6316b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -31,6 +31,7 @@
#include "verifier_helper_restricted.skel.h"
#include "verifier_helper_value_access.skel.h"
#include "verifier_int_ptr.skel.h"
+#include "verifier_iterating_callbacks.skel.h"
#include "verifier_jeq_infer_not_null.skel.h"
#include "verifier_ld_ind.skel.h"
#include "verifier_ldsx.skel.h"
@@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces
void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); }
void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
+void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
index 4ce76eb064c4..d461746fd3c1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data)
return 0;
}
+static int outer_loop(__u32 index, void *data)
+{
+ bpf_loop(nr_loops, empty_callback, NULL, 0);
+ __sync_add_and_fetch(&hits, nr_loops);
+ return 0;
+}
+
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int benchmark(void *ctx)
{
- for (int i = 0; i < 1000; i++) {
- bpf_loop(nr_loops, empty_callback, NULL, 0);
-
- __sync_add_and_fetch(&hits, nr_loops);
- }
+ bpf_loop(1000, outer_loop, NULL, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 76d661b20e87..56c764df8196 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -33,6 +33,7 @@ int underflow_prog(void *ctx)
if (!p)
return 0;
bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+ bpf_kfunc_call_test_release(p);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index 4c39e920dac2..8c0ef2742208 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx)
return 0;
bpf_spin_lock(&lock);
bpf_rbtree_add(&rbtree, &f->node, rbless);
+ bpf_spin_unlock(&lock);
return 0;
}
@@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx)
if (!f)
return 0;
bpf_loop(5, subprog_cb_ref, NULL, 0);
+ bpf_obj_drop(f);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index e02cfd380746..40df2cc26eaf 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -24,9 +24,11 @@ struct task_struct {};
#define STACK_TABLE_EPOCH_SHIFT 20
#define STROBE_MAX_STR_LEN 1
#define STROBE_MAX_CFGS 32
+#define READ_MAP_VAR_PAYLOAD_CAP \
+ ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
#define STROBE_MAX_PAYLOAD \
(STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
- STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+ STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
struct strobe_value_header {
/*
@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base,
struct strobe_value_generic *value,
struct strobemeta_payload *data,
- void *payload)
+ size_t off)
{
void *location;
uint64_t len;
@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
/*
* if bpf_probe_read_user_str returns error (<0), due to casting to
* unsinged int, it will become big number, so next check is
@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
data->str_lens[idx] = len;
- return len;
+ return off + len;
}
-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data,
- void *payload)
+static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data,
+ size_t off)
{
struct strobe_map_descr* descr = &data->map_descrs[idx];
struct strobe_map_raw map;
@@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
location = calc_location(&cfg->map_locs[idx], tls_base);
if (!location)
- return payload;
+ return off;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
- return payload;
+ return off;
descr->id = map.id;
descr->cnt = map.cnt;
@@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1;
}
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len;
- payload += len;
+ off += len;
}
#ifdef NO_UNROLL
@@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
break;
descr->key_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len;
- payload += len;
+ off += len;
}
descr->val_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len;
- payload += len;
+ off += len;
}
}
- return payload;
+ return off;
}
#ifdef USE_BPF_LOOP
@@ -455,14 +457,20 @@ struct read_var_ctx {
struct strobemeta_payload *data;
void *tls_base;
struct strobemeta_cfg *cfg;
- void *payload;
+ size_t payload_off;
/* value gets mutated */
struct strobe_value_generic *value;
enum read_type type;
};
-static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
+static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
{
+ /* lose precision info for ctx->payload_off, verifier won't track
+ * double xor, barrier_var() is needed to force clang keep both xors.
+ */
+ ctx->payload_off ^= index;
+ barrier_var(ctx->payload_off);
+ ctx->payload_off ^= index;
switch (ctx->type) {
case READ_INT_VAR:
if (index >= STROBE_MAX_INTS)
@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
case READ_MAP_VAR:
if (index >= STROBE_MAX_MAPS)
return 1;
- ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
- ctx->value, ctx->data, ctx->payload);
+ if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
+ return 1;
+ ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
break;
case READ_STR_VAR:
if (index >= STROBE_MAX_STRS)
return 1;
- ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
- ctx->value, ctx->data, ctx->payload);
+ if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
+ return 1;
+ ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
break;
}
return 0;
@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task,
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
struct strobemeta_cfg *cfg;
- void *tls_base, *payload;
+ size_t payload_off;
+ void *tls_base;
cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
if (!cfg)
@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task,
data->int_vals_set_mask = 0;
data->req_meta_valid = 0;
- payload = data->payload;
+ payload_off = 0;
/*
* we don't have struct task_struct definition, it should be:
* tls_base = (void *)task->thread.fsbase;
@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task,
.tls_base = tls_base,
.value = &value,
.data = data,
- .payload = payload,
+ .payload_off = 0,
};
int err;
@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task,
err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
if (err != STROBE_MAX_MAPS)
return NULL;
+
+ payload_off = ctx.payload_off;
+ /* this should not really happen, here only to satisfy verifer */
+ if (payload_off > sizeof(data->payload))
+ payload_off = sizeof(data->payload);
#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
- payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
}
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
- payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
}
#endif /* USE_BPF_LOOP */
@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task,
* return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent
*/
- return payload;
+ return &data->payload[payload_off];
}
SEC("raw_tracepoint/kfree_skb")
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
new file mode 100644
index 000000000000..5905e036e0ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 8);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+ __uint(max_entries, 8);
+} ringbuf SEC(".maps");
+
+struct vm_area_struct;
+struct bpf_map;
+
+struct buf_context {
+ char *buf;
+};
+
+struct num_context {
+ __u64 i;
+ __u64 j;
+};
+
+__u8 choice_arr[2] = { 0, 1 };
+
+static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
+{
+ if (idx == 0) {
+ ctx->buf = (char *)(0xDEAD);
+ return 0;
+ }
+
+ if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
+ return 1;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=fp")
+int unsafe_on_2nd_iter(void *unused)
+{
+ char buf[4];
+ struct buf_context loop_ctx = { .buf = buf };
+
+ bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i = 0;
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_on_zero_iter(void *unused)
+{
+ struct num_context loop_ctx = { .i = 32 };
+
+ bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int widening_cb(__u32 idx, struct num_context *ctx)
+{
+ ++ctx->i;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int widening(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0, .j = 1 };
+
+ bpf_loop(100, widening_cb, &loop_ctx, 0);
+ /* loop_ctx.j is not changed during callback iteration,
+ * verifier should not apply widening to it.
+ */
+ return choice_arr[loop_ctx.j];
+}
+
+static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+{
+ for (;;) {}
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("infinite loop detected")
+int loop_detection(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static __always_inline __u64 oob_state_machine(struct num_context *ctx)
+{
+ switch (ctx->i) {
+ case 0:
+ ctx->i = 1;
+ break;
+ case 1:
+ ctx->i = 32;
+ break;
+ }
+ return 0;
+}
+
+static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_for_each_map_elem(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_ringbuf_drain(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_find_vma(void *unused)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int iter_limit_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i++;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int bpf_loop_iter_limit_ok(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(1, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
+int bpf_loop_iter_limit_overflow(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(2, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 100;
+ return 0;
+}
+
+static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 10;
+ return 0;
+}
+
+static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 1;
+ bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
+ bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
+ return 0;
+}
+
+/* Check that path visiting every callback function once had been
+ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
+ * with each decimal digit corresponding to a callback visit marker.
+ */
+SEC("socket")
+__success __retval(111111)
+int bpf_loop_iter_limit_nested(void *unused)
+{
+ struct num_context ctx1 = { .i = 0 };
+ struct num_context ctx2 = { .i = 0 };
+ __u64 a, b, c;
+
+ bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
+ bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
+ a = ctx1.i;
+ b = ctx2.i;
+ /* Force 'ctx1.i' and 'ctx2.i' precise. */
+ c = choice_arr[(a + b) % 2];
+ /* This makes 'c' zero, but neither clang nor verifier know it. */
+ c /= 10;
+ /* Make sure that verifier does not visit 'impossible' states:
+ * enumerate all possible callback visit masks.
+ */
+ if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
+ b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
+ asm volatile ("r0 /= 0;" ::: "r0");
+ return 1000 * a + b + c;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index db6b3143338b..f61d623b1ce8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void)
SEC("?raw_tp")
__success __log_level(2)
+/* First simulated path does not include callback body,
+ * r1 and r4 are always precise for bpf_loop() calls.
+ */
+__msg("9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r4 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r1 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* r6 precision propagation */
__msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: last_idx 14 first_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
-__msg("mark_precise: frame0: parent state regs=r0 stack=:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
+__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+__msg("frame 0: propagating r1,r4")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+__msg("from 18 to 9: safe")
__naked int callback_result_precise(void)
{
asm volatile (
@@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void)
SEC("?raw_tp")
__success __log_level(2)
+/* First simulated path does not include callback body */
__msg("12: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 12 first_idx 10")
+__msg("mark_precise: frame0: last_idx 12 first_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
__msg("mark_precise: frame0: parent state regs=r6 stack=:")
-__msg("mark_precise: frame0: last_idx 16 first_idx 0")
-__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 15: frame1:")
+__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: (b7) r0 = 0")
+__msg("16: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 16 to 9: safe")
__naked int parent_callee_saved_reg_precise_with_callback(void)
{
asm volatile (
@@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void)
SEC("?raw_tp")
__success __log_level(2)
+/* First simulated path does not include callback body */
__msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 11")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
__msg("mark_precise: frame0: parent state regs= stack=-8:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 10 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 10:")
+/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,fp-8")
+__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 18 to 10: safe")
__naked int parent_stack_slot_precise_with_callback(void)
{
asm volatile (
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index e959336c7a73..80f620602d50 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -53,6 +53,8 @@
#define DEFAULT_TTL 64
#define MAX_ALLOWED_PORTS 8
+#define MAX_PACKET_OFF 0xffff
+
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
@@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void)
}
struct tcpopt_context {
- __u8 *ptr;
- __u8 *end;
+ void *data;
void *data_end;
__be32 *tsecr;
__u8 wscale;
bool option_timestamp;
bool option_sack;
+ __u32 off;
};
-static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
{
- __u8 opcode, opsize;
+ __u64 off = ctx->off;
+ __u8 *data;
- if (ctx->ptr >= ctx->end)
- return 1;
- if (ctx->ptr >= ctx->data_end)
- return 1;
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
- opcode = ctx->ptr[0];
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
- if (opcode == TCPOPT_EOL)
- return 1;
- if (opcode == TCPOPT_NOP) {
- ++ctx->ptr;
- return 0;
- }
+ ctx->off += sz;
+ return data;
+}
- if (ctx->ptr + 1 >= ctx->end)
- return 1;
- if (ctx->ptr + 1 >= ctx->data_end)
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+ __u8 *opcode, *opsize, *wscale, *tsecr;
+ __u32 off = ctx->off;
+
+ opcode = next(ctx, 1);
+ if (!opcode)
return 1;
- opsize = ctx->ptr[1];
- if (opsize < 2)
+
+ if (*opcode == TCPOPT_EOL)
return 1;
+ if (*opcode == TCPOPT_NOP)
+ return 0;
- if (ctx->ptr + opsize > ctx->end)
+ opsize = next(ctx, 1);
+ if (!opsize || *opsize < 2)
return 1;
- switch (opcode) {
+ switch (*opcode) {
case TCPOPT_WINDOW:
- if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
- ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
+ wscale = next(ctx, 1);
+ if (!wscale)
+ return 1;
+ if (*opsize == TCPOLEN_WINDOW)
+ ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
break;
case TCPOPT_TIMESTAMP:
- if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
+ tsecr = next(ctx, 4);
+ if (!tsecr)
+ return 1;
+ if (*opsize == TCPOLEN_TIMESTAMP) {
ctx->option_timestamp = true;
/* Client's tsval becomes our tsecr. */
- *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
+ *ctx->tsecr = get_unaligned((__be32 *)tsecr);
}
break;
case TCPOPT_SACK_PERM:
- if (opsize == TCPOLEN_SACK_PERM)
+ if (*opsize == TCPOLEN_SACK_PERM)
ctx->option_sack = true;
break;
}
- ctx->ptr += opsize;
+ ctx->off = off + *opsize;
return 0;
}
@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
__u16 tcp_len, __be32 *tsval,
- __be32 *tsecr, void *data_end)
+ __be32 *tsecr, void *data, void *data_end)
{
struct tcpopt_context loop_ctx = {
- .ptr = (__u8 *)(tcp_header + 1),
- .end = (__u8 *)tcp_header + tcp_len,
+ .data = data,
.data_end = data_end,
.tsecr = tsecr,
.wscale = TS_OPT_WSCALE_MASK,
.option_timestamp = false,
.option_sack = false,
+ /* Note: currently verifier would track .off as unbound scalar.
+ * In case if verifier would at some point get smarter and
+ * compute bounded value for this var, beware that it might
+ * hinder bpf_loop() convergence validation.
+ */
+ .off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
};
u32 cookie;
@@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
cookie = (__u32)value;
if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
- &tsopt_buf[0], &tsopt_buf[1], data_end))
+ &tsopt_buf[0], &tsopt_buf[1], data, data_end))
tsopt = tsopt_buf;
/* Check that there is enough space for a SYNACK. It also covers