From 9d2349740e430b20660457b7c88fa06467457272 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 12 Dec 2022 13:15:02 -0800
Subject: selftests/bpf: Add non-standardly sized enum tests for btf_dump

Add few custom enum definitions testing mode(byte) and mode(word)
attributes.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221212211505.558851-4-andrii@kernel.org
---
 .../bpf/progs/btf_dump_test_case_syntax.c          | 36 ++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 4ee4748133fe..26fffb02ed10 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -25,6 +25,39 @@ typedef enum {
 	H = 2,
 } e3_t;
 
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *enum e_byte {
+ *	EBYTE_1 = 0,
+ *	EBYTE_2 = 1,
+ *} __attribute__((mode(byte)));
+ *
+ */
+/* ----- END-EXPECTED-OUTPUT ----- */
+enum e_byte {
+	EBYTE_1,
+	EBYTE_2,
+} __attribute__((mode(byte)));
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *enum e_word {
+ *	EWORD_1 = 0LL,
+ *	EWORD_2 = 1LL,
+ *} __attribute__((mode(word)));
+ *
+ */
+/* ----- END-EXPECTED-OUTPUT ----- */
+enum e_word {
+	EWORD_1,
+	EWORD_2,
+} __attribute__((mode(word))); /* force to use 8-byte backing for this enum */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+enum e_big {
+	EBIG_1 = 1000000000000ULL,
+};
+
 typedef int int_t;
 
 typedef volatile const int * volatile const crazy_ptr_t;
@@ -224,6 +257,9 @@ struct root_struct {
 	enum e2 _2;
 	e2_t _2_1;
 	e3_t _2_2;
+	enum e_byte _100;
+	enum e_word _101;
+	enum e_big _102;
 	struct struct_w_typedefs _3;
 	anon_struct_t _7;
 	struct struct_fwd *_8;
-- 
cgit v1.2.3


From ea2ce1ba99aa6a60c8d8a706e3abadf3de372163 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 12 Dec 2022 13:15:04 -0800
Subject: libbpf: Fix BTF-to-C converter's padding logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turns out that btf_dump API doesn't handle a bunch of tricky corner
cases, as reported by Per, and further discovered using his testing
Python script ([0]).

This patch revamps btf_dump's padding logic significantly, making it
more correct and also avoiding unnecessary explicit padding, where
compiler would pad naturally. This overall topic turned out to be very
tricky and subtle, there are lots of subtle corner cases. The comments
in the code tries to give some clues, but comments themselves are
supposed to be paired with good understanding of C alignment and padding
rules. Plus some experimentation to figure out subtle things like
whether `long :0;` means that struct is now forced to be long-aligned
(no, it's not, turns out).

Anyways, Per's script, while not completely correct in some known
situations, doesn't show any obvious cases where this logic breaks, so
this is a nice improvement over the previous state of this logic.

Some selftests had to be adjusted to accommodate better use of natural
alignment rules, eliminating some unnecessary padding, or changing it to
`type: 0;` alignment markers.

Note also that for when we are in between bitfields, we emit explicit
bit size, while otherwise we use `: 0`, this feels much more natural in
practice.

Next patch will add few more test cases, found through randomized Per's
script.

  [0] https://lore.kernel.org/bpf/85f83c333f5355c8ac026f835b18d15060725fcb.camel@ericsson.com/

Reported-by: Per Sundström XP <per.xp.sundstrom@ericsson.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221212211505.558851-6-andrii@kernel.org
---
 tools/lib/bpf/btf_dump.c                           | 169 +++++++++++++++------
 .../bpf/progs/btf_dump_test_case_bitfields.c       |   2 +-
 .../bpf/progs/btf_dump_test_case_padding.c         |  58 ++++---
 3 files changed, 164 insertions(+), 65 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 234e82334d56..d6fd93a57f11 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -830,6 +830,25 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 	}
 }
 
+static int btf_natural_align_of(const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+	int i, align, vlen;
+	const struct btf_member *m;
+
+	if (!btf_is_composite(t))
+		return btf__align_of(btf, id);
+
+	align = 1;
+	m = btf_members(t);
+	vlen = btf_vlen(t);
+	for (i = 0; i < vlen; i++, m++) {
+		align = max(align, btf__align_of(btf, m->type));
+	}
+
+	return align;
+}
+
 static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 				 const struct btf_type *t)
 {
@@ -837,16 +856,16 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 	int align, i, bit_sz;
 	__u16 vlen;
 
-	align = btf__align_of(btf, id);
-	/* size of a non-packed struct has to be a multiple of its alignment*/
-	if (align && t->size % align)
+	align = btf_natural_align_of(btf, id);
+	/* size of a non-packed struct has to be a multiple of its alignment */
+	if (align && (t->size % align) != 0)
 		return true;
 
 	m = btf_members(t);
 	vlen = btf_vlen(t);
 	/* all non-bitfield fields have to be naturally aligned */
 	for (i = 0; i < vlen; i++, m++) {
-		align = btf__align_of(btf, m->type);
+		align = btf_natural_align_of(btf, m->type);
 		bit_sz = btf_member_bitfield_size(t, i);
 		if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
 			return true;
@@ -859,44 +878,97 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 	return false;
 }
 
-static int chip_away_bits(int total, int at_most)
-{
-	return total % at_most ? : at_most;
-}
-
 static void btf_dump_emit_bit_padding(const struct btf_dump *d,
-				      int cur_off, int m_off, int m_bit_sz,
-				      int align, int lvl)
+				      int cur_off, int next_off, int next_align,
+				      bool in_bitfield, int lvl)
 {
-	int off_diff = m_off - cur_off;
-	int ptr_bits = d->ptr_sz * 8;
+	const struct {
+		const char *name;
+		int bits;
+	} pads[] = {
+		{"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8}
+	};
+	int new_off, pad_bits, bits, i;
+	const char *pad_type;
+
+	if (cur_off >= next_off)
+		return; /* no gap */
+
+	/* For filling out padding we want to take advantage of
+	 * natural alignment rules to minimize unnecessary explicit
+	 * padding. First, we find the largest type (among long, int,
+	 * short, or char) that can be used to force naturally aligned
+	 * boundary. Once determined, we'll use such type to fill in
+	 * the remaining padding gap. In some cases we can rely on
+	 * compiler filling some gaps, but sometimes we need to force
+	 * alignment to close natural alignment with markers like
+	 * `long: 0` (this is always the case for bitfields).  Note
+	 * that even if struct itself has, let's say 4-byte alignment
+	 * (i.e., it only uses up to int-aligned types), using `long:
+	 * X;` explicit padding doesn't actually change struct's
+	 * overall alignment requirements, but compiler does take into
+	 * account that type's (long, in this example) natural
+	 * alignment requirements when adding implicit padding. We use
+	 * this fact heavily and don't worry about ruining correct
+	 * struct alignment requirement.
+	 */
+	for (i = 0; i < ARRAY_SIZE(pads); i++) {
+		pad_bits = pads[i].bits;
+		pad_type = pads[i].name;
 
-	if (off_diff <= 0)
-		/* no gap */
-		return;
-	if (m_bit_sz == 0 && off_diff < align * 8)
-		/* natural padding will take care of a gap */
-		return;
+		new_off = roundup(cur_off, pad_bits);
+		if (new_off <= next_off)
+			break;
+	}
 
-	while (off_diff > 0) {
-		const char *pad_type;
-		int pad_bits;
-
-		if (ptr_bits > 32 && off_diff > 32) {
-			pad_type = "long";
-			pad_bits = chip_away_bits(off_diff, ptr_bits);
-		} else if (off_diff > 16) {
-			pad_type = "int";
-			pad_bits = chip_away_bits(off_diff, 32);
-		} else if (off_diff > 8) {
-			pad_type = "short";
-			pad_bits = chip_away_bits(off_diff, 16);
-		} else {
-			pad_type = "char";
-			pad_bits = chip_away_bits(off_diff, 8);
+	if (new_off > cur_off && new_off <= next_off) {
+		/* We need explicit `<type>: 0` aligning mark if next
+		 * field is right on alignment offset and its
+		 * alignment requirement is less strict than <type>'s
+		 * alignment (so compiler won't naturally align to the
+		 * offset we expect), or if subsequent `<type>: X`,
+		 * will actually completely fit in the remaining hole,
+		 * making compiler basically ignore `<type>: X`
+		 * completely.
+		 */
+		if (in_bitfield ||
+		    (new_off == next_off && roundup(cur_off, next_align * 8) != new_off) ||
+		    (new_off != next_off && next_off - new_off <= new_off - cur_off))
+			/* but for bitfields we'll emit explicit bit count */
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type,
+					in_bitfield ? new_off - cur_off : 0);
+		cur_off = new_off;
+	}
+
+	/* Now we know we start at naturally aligned offset for a chosen
+	 * padding type (long, int, short, or char), and so the rest is just
+	 * a straightforward filling of remaining padding gap with full
+	 * `<type>: sizeof(<type>);` markers, except for the last one, which
+	 * might need smaller than sizeof(<type>) padding.
+	 */
+	while (cur_off != next_off) {
+		bits = min(next_off - cur_off, pad_bits);
+		if (bits == pad_bits) {
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+			cur_off += bits;
+			continue;
+		}
+		/* For the remainder padding that doesn't cover entire
+		 * pad_type bit length, we pick the smallest necessary type.
+		 * This is pure aesthetics, we could have just used `long`,
+		 * but having smallest necessary one communicates better the
+		 * scale of the padding gap.
+		 */
+		for (i = ARRAY_SIZE(pads) - 1; i >= 0; i--) {
+			pad_type = pads[i].name;
+			pad_bits = pads[i].bits;
+			if (pad_bits < bits)
+				continue;
+
+			btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, bits);
+			cur_off += bits;
+			break;
 		}
-		btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
-		off_diff -= pad_bits;
 	}
 }
 
@@ -916,9 +988,11 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 {
 	const struct btf_member *m = btf_members(t);
 	bool is_struct = btf_is_struct(t);
-	int align, i, packed, off = 0;
+	bool packed, prev_bitfield = false;
+	int align, i, off = 0;
 	__u16 vlen = btf_vlen(t);
 
+	align = btf__align_of(d->btf, id);
 	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
 
 	btf_dump_printf(d, "%s%s%s {",
@@ -928,33 +1002,36 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
 
 	for (i = 0; i < vlen; i++, m++) {
 		const char *fname;
-		int m_off, m_sz;
+		int m_off, m_sz, m_align;
+		bool in_bitfield;
 
 		fname = btf_name_of(d, m->name_off);
 		m_sz = btf_member_bitfield_size(t, i);
 		m_off = btf_member_bit_offset(t, i);
-		align = packed ? 1 : btf__align_of(d->btf, m->type);
+		m_align = packed ? 1 : btf__align_of(d->btf, m->type);
+
+		in_bitfield = prev_bitfield && m_sz != 0;
 
-		btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
+		btf_dump_emit_bit_padding(d, off, m_off, m_align, in_bitfield, lvl + 1);
 		btf_dump_printf(d, "\n%s", pfx(lvl + 1));
 		btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
 
 		if (m_sz) {
 			btf_dump_printf(d, ": %d", m_sz);
 			off = m_off + m_sz;
+			prev_bitfield = true;
 		} else {
 			m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
 			off = m_off + m_sz * 8;
+			prev_bitfield = false;
 		}
+
 		btf_dump_printf(d, ";");
 	}
 
 	/* pad at the end, if necessary */
-	if (is_struct) {
-		align = packed ? 1 : btf__align_of(d->btf, id);
-		btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
-					  lvl + 1);
-	}
+	if (is_struct)
+		btf_dump_emit_bit_padding(d, off, t->size * 8, align, false, lvl + 1);
 
 	/*
 	 * Keep `struct empty {}` on a single line,
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
index e5560a656030..e01690618e1e 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
@@ -53,7 +53,7 @@ struct bitfields_only_mixed_types {
  */
 /* ------ END-EXPECTED-OUTPUT ------ */
 struct bitfield_mixed_with_others {
-	long: 4; /* char is enough as a backing field */
+	char: 4; /* char is enough as a backing field */
 	int a: 4;
 	/* 8-bit implicit padding */
 	short b; /* combined with previous bitfield */
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
index 7cb522d22a66..6f963d34c45b 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -19,7 +19,7 @@ struct padded_implicitly {
 /*
  *struct padded_explicitly {
  *	int a;
- *	int: 32;
+ *	long: 0;
  *	int b;
  *};
  *
@@ -28,41 +28,28 @@ struct padded_implicitly {
 
 struct padded_explicitly {
 	int a;
-	int: 1; /* algo will explicitly pad with full 32 bits here */
+	int: 1; /* algo will emit aligning `long: 0;` here */
 	int b;
 };
 
 /* ----- START-EXPECTED-OUTPUT ----- */
-/*
- *struct padded_a_lot {
- *	int a;
- *	long: 32;
- *	long: 64;
- *	long: 64;
- *	int b;
- *};
- *
- */
-/* ------ END-EXPECTED-OUTPUT ------ */
-
 struct padded_a_lot {
 	int a;
-	/* 32 bit of implicit padding here, which algo will make explicit */
 	long: 64;
 	long: 64;
 	int b;
 };
 
+/* ------ END-EXPECTED-OUTPUT ------ */
+
 /* ----- START-EXPECTED-OUTPUT ----- */
 /*
  *struct padded_cache_line {
  *	int a;
- *	long: 32;
  *	long: 64;
  *	long: 64;
  *	long: 64;
  *	int b;
- *	long: 32;
  *	long: 64;
  *	long: 64;
  *	long: 64;
@@ -85,7 +72,7 @@ struct padded_cache_line {
  *struct zone {
  *	int a;
  *	short b;
- *	short: 16;
+ *	long: 0;
  *	struct zone_padding __pad__;
  *};
  *
@@ -108,6 +95,39 @@ struct padding_wo_named_members {
 	long: 64;
 };
 
+struct padding_weird_1 {
+	int a;
+	long: 64;
+	short: 16;
+	short b;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padding_weird_2 {
+ *	long: 56;
+ *	char a;
+ *	long: 56;
+ *	char b;
+ *	char: 8;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+struct padding_weird_2 {
+	int: 32;	/* these paddings will be collapsed into `long: 56;` */
+	short: 16;
+	char: 8;
+	char a;
+	int: 32;	/* these paddings will be collapsed into `long: 56;` */
+	short: 16;
+	char: 8;
+	char b;
+	char: 8;
+};
+
 /* ------ END-EXPECTED-OUTPUT ------ */
 
 int f(struct {
@@ -117,6 +137,8 @@ int f(struct {
 	struct padded_cache_line _4;
 	struct zone _5;
 	struct padding_wo_named_members _6;
+	struct padding_weird_1 _7;
+	struct padding_weird_2 _8;
 } *_)
 {
 	return 0;
-- 
cgit v1.2.3


From b148c8b9b926e257a59c8eb2cd6fa3adfd443254 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 12 Dec 2022 13:15:05 -0800
Subject: selftests/bpf: Add few corner cases to test padding handling of
 btf_dump

Add few hand-crafted cases and few randomized cases found using script
from [0] that tests btf_dump's padding logic.

  [0] https://lore.kernel.org/bpf/85f83c333f5355c8ac026f835b18d15060725fcb.camel@ericsson.com/

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20221212211505.558851-7-andrii@kernel.org
---
 .../bpf/progs/btf_dump_test_case_packing.c         |  61 +++++++++++-
 .../bpf/progs/btf_dump_test_case_padding.c         | 104 +++++++++++++++++++++
 2 files changed, 164 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
index e304b6204bd9..5c6c62f7ed32 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -58,7 +58,64 @@ union jump_code_union {
 	} __attribute__((packed));
 };
 
-/*------ END-EXPECTED-OUTPUT ------ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct nested_packed_but_aligned_struct {
+ *	int x1;
+ *	int x2;
+ *};
+ *
+ *struct outer_implicitly_packed_struct {
+ *	char y1;
+ *	struct nested_packed_but_aligned_struct y2;
+ *} __attribute__((packed));
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct nested_packed_but_aligned_struct {
+	int x1;
+	int x2;
+} __attribute__((packed));
+
+struct outer_implicitly_packed_struct {
+	char y1;
+	struct nested_packed_but_aligned_struct y2;
+};
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct usb_ss_ep_comp_descriptor {
+ *	char: 8;
+ *	char bDescriptorType;
+ *	char bMaxBurst;
+ *	short wBytesPerInterval;
+ *};
+ *
+ *struct usb_host_endpoint {
+ *	long: 64;
+ *	char: 8;
+ *	struct usb_ss_ep_comp_descriptor ss_ep_comp;
+ *	long: 0;
+ *} __attribute__((packed));
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct usb_ss_ep_comp_descriptor {
+	char: 8;
+	char bDescriptorType;
+	char bMaxBurst;
+	int: 0;
+	short wBytesPerInterval;
+} __attribute__((packed));
+
+struct usb_host_endpoint {
+	long: 64;
+	char: 8;
+	struct usb_ss_ep_comp_descriptor ss_ep_comp;
+	long: 0;
+};
+
 
 int f(struct {
 	struct packed_trailing_space _1;
@@ -69,6 +126,8 @@ int f(struct {
 	union union_is_never_packed _6;
 	union union_does_not_need_packing _7;
 	union jump_code_union _8;
+	struct outer_implicitly_packed_struct _9;
+	struct usb_host_endpoint _10;
 } *_)
 {
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
index 6f963d34c45b..79276fbe454a 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -128,6 +128,98 @@ struct padding_weird_2 {
 	char: 8;
 };
 
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct exact_1byte {
+	char x;
+};
+
+struct padded_1byte {
+	char: 8;
+};
+
+struct exact_2bytes {
+	short x;
+};
+
+struct padded_2bytes {
+	short: 16;
+};
+
+struct exact_4bytes {
+	int x;
+};
+
+struct padded_4bytes {
+	int: 32;
+};
+
+struct exact_8bytes {
+	long x;
+};
+
+struct padded_8bytes {
+	long: 64;
+};
+
+struct ff_periodic_effect {
+	int: 32;
+	short magnitude;
+	long: 0;
+	short phase;
+	long: 0;
+	int: 32;
+	int custom_len;
+	short *custom_data;
+};
+
+struct ib_wc {
+	long: 64;
+	long: 64;
+	int: 32;
+	int byte_len;
+	void *qp;
+	union {} ex;
+	long: 64;
+	int slid;
+	int wc_flags;
+	long: 64;
+	char smac[6];
+	long: 0;
+	char network_hdr_type;
+};
+
+struct acpi_object_method {
+	long: 64;
+	char: 8;
+	char type;
+	short reference_count;
+	char flags;
+	short: 0;
+	char: 8;
+	char sync_level;
+	long: 64;
+	void *node;
+	void *aml_start;
+	union {} dispatch;
+	long: 64;
+	int aml_length;
+};
+
+struct nested_unpacked {
+	int x;
+};
+
+struct nested_packed {
+	struct nested_unpacked a;
+	char c;
+} __attribute__((packed));
+
+struct outer_mixed_but_unpacked {
+	struct nested_packed b1;
+	short a1;
+	struct nested_packed b2;
+};
+
 /* ------ END-EXPECTED-OUTPUT ------ */
 
 int f(struct {
@@ -139,6 +231,18 @@ int f(struct {
 	struct padding_wo_named_members _6;
 	struct padding_weird_1 _7;
 	struct padding_weird_2 _8;
+	struct exact_1byte _100;
+	struct padded_1byte _101;
+	struct exact_2bytes _102;
+	struct padded_2bytes _103;
+	struct exact_4bytes _104;
+	struct padded_4bytes _105;
+	struct exact_8bytes _106;
+	struct padded_8bytes _107;
+	struct ff_periodic_effect _200;
+	struct ib_wc _201;
+	struct acpi_object_method _202;
+	struct outer_mixed_but_unpacked _203;
 } *_)
 {
 	return 0;
-- 
cgit v1.2.3


From 4fb877aaa179dcdb1676d55216482febaada457e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 15 Dec 2022 10:36:05 -0800
Subject: libbpf: Fix btf_dump's packed struct determination

Fix bug in btf_dump's logic of determining if a given struct type is
packed or not. The notion of "natural alignment" is not needed and is
even harmful in this case, so drop it altogether. The biggest difference
in btf_is_struct_packed() compared to its original implementation is
that we don't really use btf__align_of() to determine overall alignment
of a struct type (because it could be 1 for both packed and non-packed
struct, depending on specifci field definitions), and just use field's
actual alignment to calculate whether any field is requiring packing or
struct's size overall necessitates packing.

Add two simple test cases that demonstrate the difference this change
would make.

Fixes: ea2ce1ba99aa ("libbpf: Fix BTF-to-C converter's padding logic")
Reported-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20221215183605.4149488-1-andrii@kernel.org
---
 tools/lib/bpf/btf_dump.c                           | 33 ++++------------------
 .../bpf/progs/btf_dump_test_case_packing.c         | 19 +++++++++++++
 2 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index d6fd93a57f11..580985ee5545 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -830,47 +830,26 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 	}
 }
 
-static int btf_natural_align_of(const struct btf *btf, __u32 id)
-{
-	const struct btf_type *t = btf__type_by_id(btf, id);
-	int i, align, vlen;
-	const struct btf_member *m;
-
-	if (!btf_is_composite(t))
-		return btf__align_of(btf, id);
-
-	align = 1;
-	m = btf_members(t);
-	vlen = btf_vlen(t);
-	for (i = 0; i < vlen; i++, m++) {
-		align = max(align, btf__align_of(btf, m->type));
-	}
-
-	return align;
-}
-
 static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
 				 const struct btf_type *t)
 {
 	const struct btf_member *m;
-	int align, i, bit_sz;
+	int max_align = 1, align, i, bit_sz;
 	__u16 vlen;
 
-	align = btf_natural_align_of(btf, id);
-	/* size of a non-packed struct has to be a multiple of its alignment */
-	if (align && (t->size % align) != 0)
-		return true;
-
 	m = btf_members(t);
 	vlen = btf_vlen(t);
 	/* all non-bitfield fields have to be naturally aligned */
 	for (i = 0; i < vlen; i++, m++) {
-		align = btf_natural_align_of(btf, m->type);
+		align = btf__align_of(btf, m->type);
 		bit_sz = btf_member_bitfield_size(t, i);
 		if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
 			return true;
+		max_align = max(align, max_align);
 	}
-
+	/* size of a non-packed struct has to be a multiple of its alignment */
+	if (t->size % max_align != 0)
+		return true;
 	/*
 	 * if original struct was marked as packed, but its layout is
 	 * naturally aligned, we'll detect that it's not packed
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
index 5c6c62f7ed32..7998f27df7dd 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -116,6 +116,23 @@ struct usb_host_endpoint {
 	long: 0;
 };
 
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct nested_packed_struct {
+	int a;
+	char b;
+} __attribute__((packed));
+
+struct outer_nonpacked_struct {
+	short a;
+	struct nested_packed_struct b;
+};
+
+struct outer_packed_struct {
+	short a;
+	struct nested_packed_struct b;
+} __attribute__((packed));
+
+/* ------ END-EXPECTED-OUTPUT ------ */
 
 int f(struct {
 	struct packed_trailing_space _1;
@@ -128,6 +145,8 @@ int f(struct {
 	union jump_code_union _8;
 	struct outer_implicitly_packed_struct _9;
 	struct usb_host_endpoint _10;
+	struct outer_nonpacked_struct _11;
+	struct outer_packed_struct _12;
 } *_)
 {
 	return 0;
-- 
cgit v1.2.3


From ac6e45e05857464a1e347c50da9917141f1fbb80 Mon Sep 17 00:00:00 2001
From: Christian Ehrig <cehrig@cloudflare.com>
Date: Sun, 18 Dec 2022 06:17:32 +0100
Subject: selftests/bpf: Add BPF_F_NO_TUNNEL_KEY test

This patch adds a selftest simulating a GRE sender and receiver using
tunnel headers without tunnel keys. It validates if packets encapsulated
using BPF_F_NO_TUNNEL_KEY are decapsulated by a GRE receiver not
configured with tunnel keys.

Signed-off-by: Christian Ehrig <cehrig@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20221218051734.31411-2-cehrig@cloudflare.com
---
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 21 ++++++++++++
 tools/testing/selftests/bpf/test_tunnel.sh         | 40 ++++++++++++++++++++--
 2 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 98af55f0bcd3..508da4a23c4f 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -81,6 +81,27 @@ int gre_set_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+SEC("tc")
+int gre_set_tunnel_no_key(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER |
+				     BPF_F_NO_TUNNEL_KEY);
+	if (ret < 0) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
 SEC("tc")
 int gre_get_tunnel(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 2eaedc1d9ed3..06857b689c11 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -66,15 +66,20 @@ config_device()
 
 add_gre_tunnel()
 {
+	tun_key=
+	if [ -n "$1" ]; then
+		tun_key="key $1"
+	fi
+
 	# at_ns0 namespace
 	ip netns exec at_ns0 \
-        ip link add dev $DEV_NS type $TYPE seq key 2 \
+        ip link add dev $DEV_NS type $TYPE seq $tun_key \
 		local 172.16.1.100 remote 172.16.1.200
 	ip netns exec at_ns0 ip link set dev $DEV_NS up
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
 
 	# root namespace
-	ip link add dev $DEV type $TYPE key 2 external
+	ip link add dev $DEV type $TYPE $tun_key external
 	ip link set dev $DEV up
 	ip addr add dev $DEV 10.1.1.200/24
 }
@@ -238,7 +243,7 @@ test_gre()
 
 	check $TYPE
 	config_device
-	add_gre_tunnel
+	add_gre_tunnel 2
 	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
 	ping $PING_ARG 10.1.1.100
 	check_err $?
@@ -253,6 +258,30 @@ test_gre()
         echo -e ${GREEN}"PASS: $TYPE"${NC}
 }
 
+test_gre_no_tunnel_key()
+{
+	TYPE=gre
+	DEV_NS=gre00
+	DEV=gre11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_gre_tunnel
+	attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
 test_ip6gre()
 {
 	TYPE=ip6gre
@@ -589,6 +618,7 @@ cleanup()
 	ip link del ipip6tnl11 2> /dev/null
 	ip link del ip6ip6tnl11 2> /dev/null
 	ip link del gretap11 2> /dev/null
+	ip link del gre11 2> /dev/null
 	ip link del ip6gre11 2> /dev/null
 	ip link del ip6gretap11 2> /dev/null
 	ip link del geneve11 2> /dev/null
@@ -641,6 +671,10 @@ bpf_tunnel_test()
 	test_gre
 	errors=$(( $errors + $? ))
 
+	echo "Testing GRE tunnel (without tunnel keys)..."
+	test_gre_no_tunnel_key
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6GRE tunnel..."
 	test_ip6gre
 	errors=$(( $errors + $? ))
-- 
cgit v1.2.3


From e7f0d5cdd023d8fa53d9ca541b9a55f0eb45618c Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Sun, 18 Dec 2022 06:35:09 +0800
Subject: bpf: makefiles: Do not generate empty vmlinux.h

Remove the empty vmlinux.h if bpftool failed to dump btf info.
The empty vmlinux.h can hide real error when reading output
of make.

This is done by adding .DELETE_ON_ERROR special target in related
makefiles.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20221217223509.88254-3-changbin.du@gmail.com
---
 tools/bpf/bpftool/Makefile           | 3 +++
 tools/testing/selftests/bpf/Makefile | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 787b857d3fb5..313fd1b09189 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -289,3 +289,6 @@ FORCE:
 .PHONY: all FORCE bootstrap clean install-bin install uninstall
 .PHONY: doc doc-clean doc-install doc-uninstall
 .DEFAULT_GOAL := all
+
+# Delete partially updated (corrupted) files on error
+.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c22c43bbee19..205e8c3c346a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -626,3 +626,6 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)	\
 			       liburandom_read.so)
 
 .PHONY: docs docs-clean
+
+# Delete partially updated (corrupted) files on error
+.DELETE_ON_ERROR:
-- 
cgit v1.2.3


From 59fe41b5255f7b8a19a3347ec4b03fd830d6f4aa Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 16 Dec 2022 13:43:19 -0800
Subject: selftests/bpf: Add verifier test exercising jit PROBE_MEM logic

This patch adds a test exercising logic that was fixed / improved in
the previous patch in the series, as well as general sanity checking for
jit's PROBE_MEM logic which should've been unaffected by the previous
patch.

The added verifier test does the following:

  * Acquire a referenced kptr to struct prog_test_ref_kfunc using
    existing net/bpf/test_run.c kfunc
    * Helper returns ptr to a specific prog_test_ref_kfunc whose first
      two fields - both ints - have been prepopulated w/ vals 42 and
      108, respectively
  * kptr_xchg the acquired ptr into an arraymap
  * Do a direct map_value load of the just-added ptr
    * Goal of all this setup is to get an unreferenced kptr pointing to
      struct with ints of known value, which is the result of this step
  * Using unreferenced kptr obtained in previous step, do loads of
    prog_test_ref_kfunc.a (offset 0) and .b (offset 4)
  * Then incr the kptr by 8 and load prog_test_ref_kfunc.a again (this
    time at offset -8)
  * Add all the loaded ints together and return

Before the PROBE_MEM fixes in previous patch, the loads at offset 0 and
4 would succeed, while the load at offset -8 would incorrectly fail
runtime check emitted by the JIT and 0 out dst reg as a result. This
confirmed by retval of 150 for this test before previous patch - since
second .a read is 0'd out - and a retval of 192 with the fixed logic.

The test exercises the two optimizations to fixed logic added in last
patch as well:

  * First load, with insn "r8 = *(u32 *)(r9 + 0)" exercises "insn->off
    is 0, no need to add / sub from src_reg" optimization
  * Third load, with insn "r9 = *(u32 *)(r9 - 8)" exercises "src_reg ==
    dst_reg, no need to restore src_reg after load" optimization

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221216214319.3408356-2-davemarchevsky@fb.com
---
 .../selftests/bpf/prog_tests/jit_probe_mem.c       | 28 ++++++++++
 tools/testing/selftests/bpf/progs/jit_probe_mem.c  | 61 ++++++++++++++++++++++
 2 files changed, 89 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c
 create mode 100644 tools/testing/selftests/bpf/progs/jit_probe_mem.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c
new file mode 100644
index 000000000000..5639428607e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "jit_probe_mem.skel.h"
+
+void test_jit_probe_mem(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+	struct jit_probe_mem *skel;
+	int ret;
+
+	skel = jit_probe_mem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "jit_probe_mem__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_jit_probe_mem), &opts);
+	ASSERT_OK(ret, "jit_probe_mem ret");
+	ASSERT_OK(opts.retval, "jit_probe_mem opts.retval");
+	ASSERT_EQ(skel->data->total_sum, 192, "jit_probe_mem total_sum");
+
+	jit_probe_mem__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
new file mode 100644
index 000000000000..2d2e61470794
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+static struct prog_test_ref_kfunc __kptr_ref *v;
+long total_sum = -1;
+
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+
+SEC("tc")
+int test_jit_probe_mem(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	unsigned long zero = 0, sum;
+
+	p = bpf_kfunc_call_test_acquire(&zero);
+	if (!p)
+		return 1;
+
+	p = bpf_kptr_xchg(&v, p);
+	if (p)
+		goto release_out;
+
+	/* Direct map value access of kptr, should be PTR_UNTRUSTED */
+	p = v;
+	if (!p)
+		return 1;
+
+	asm volatile (
+		"r9 = %[p];"
+		"%[sum] = 0;"
+
+		/* r8 = p->a */
+		"r8 = *(u32 *)(r9 + 0);"
+		"%[sum] += r8;"
+
+		/* r8 = p->b */
+		"r8 = *(u32 *)(r9 + 4);"
+		"%[sum] += r8;"
+
+		"r9 += 8;"
+		/* r9 = p->a */
+		"r9 = *(u32 *)(r9 - 8);"
+		"%[sum] += r9;"
+
+		: [sum] "=r"(sum)
+		: [p] "r"(p)
+		: "r8", "r9"
+	);
+
+	total_sum = sum;
+	return 0;
+release_out:
+	bpf_kfunc_call_test_release(p);
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 5fbf8c24b66d8d63fd6a452fc60e7e11d98ac5f6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 22 Dec 2022 15:30:58 +0100
Subject: selftests/bpf: Add jit probe_mem corner case tests to s390x denylist

BPF CI fails for s390x with the following result:

  [...]
  All error logs:

  libbpf: prog 'test_jit_probe_mem': BPF program load failed: ERROR: strerror_r(524)=22
  libbpf: prog 'test_jit_probe_mem':
    -- BEGIN PROG LOAD LOG --
    JIT does not support calling kernel function
    processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
    -- END PROG LOAD LOG --
  libbpf: prog 'test_jit_probe_mem': failed to load: -524
  libbpf: failed to load object 'jit_probe_mem'
  libbpf: failed to load BPF skeleton 'jit_probe_mem': -524
  test_jit_probe_mem:FAIL:jit_probe_mem__open_and_load unexpected error: -524
  #89      jit_probe_mem:FAIL
  [...]

Add the test to the deny list.

Fixes: 59fe41b5255f ("selftests/bpf: Add verifier test exercising jit PROBE_MEM logic")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 585fcf73c731..3efe091255bf 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -26,6 +26,7 @@ get_func_args_test	                 # trampoline
 get_func_ip_test                         # get_func_ip_test__attach unexpected error: -524                             (trampoline)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
 htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
+jit_probe_mem                            # jit_probe_mem__open_and_load unexpected error: -524                         (kfunc)
 kfree_skb                                # attach fentry unexpected error: -524                                        (trampoline)
 kfunc_call                               # 'bpf_prog_active': not found in kernel BTF                                  (?)
 kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
-- 
cgit v1.2.3


From 4a753ca5013d465694d6639fbe1378831a399bda Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Fri, 6 Jan 2023 10:57:24 -0800
Subject: selftest: mptcp: exit from copyfd_io_poll() when receive SIGUSR1

For now, mptcp_connect won't exit after receiving the 'SIGUSR1' signal
if '-r' is set. Fix this by skipping poll and sleep in copyfd_io_poll()
if 'quit' is set.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 8a8266957bc5..b25a31445ded 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -627,7 +627,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
 		char rbuf[8192];
 		ssize_t len;
 
-		if (fds.events == 0)
+		if (fds.events == 0 || quit)
 			break;
 
 		switch (poll(&fds, 1, poll_timeout)) {
@@ -733,7 +733,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
 	}
 
 	/* leave some time for late join/announce */
-	if (cfg_remove)
+	if (cfg_remove && !quit)
 		usleep(cfg_wait);
 
 	return 0;
-- 
cgit v1.2.3


From e04a30f788091076ff874ede84d33817d50d5937 Mon Sep 17 00:00:00 2001
From: Menglong Dong <imagedong@tencent.com>
Date: Fri, 6 Jan 2023 10:57:25 -0800
Subject: selftest: mptcp: add test for mptcp socket in use

Add the function chk_msk_inuse() to diag.sh, which is used to check the
statistics of mptcp socket in use. As mptcp socket in listen state will
be closed randomly after 'accept', we need to get the count of listening
mptcp socket through 'ss' command.

All tests pass.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Menglong Dong <imagedong@tencent.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/diag.sh | 56 ++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 24bcd7b9bdb2..ef628b16fe9b 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -17,6 +17,11 @@ flush_pids()
 	sleep 1.1
 
 	ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
+
+	for _ in $(seq 10); do
+		[ -z "$(ip netns pids "${ns}")" ] && break
+		sleep 0.1
+	done
 }
 
 cleanup()
@@ -37,15 +42,20 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
+get_msk_inuse()
+{
+	ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}'
+}
+
 __chk_nr()
 {
-	local condition="$1"
+	local command="$1"
 	local expected=$2
 	local msg nr
 
 	shift 2
 	msg=$*
-	nr=$(ss -inmHMN $ns | $condition)
+	nr=$(eval $command)
 
 	printf "%-50s" "$msg"
 	if [ $nr != $expected ]; then
@@ -57,9 +67,17 @@ __chk_nr()
 	test_cnt=$((test_cnt+1))
 }
 
+__chk_msk_nr()
+{
+	local condition=$1
+	shift 1
+
+	__chk_nr "ss -inmHMN $ns | $condition" $*
+}
+
 chk_msk_nr()
 {
-	__chk_nr "grep -c token:" $*
+	__chk_msk_nr "grep -c token:" $*
 }
 
 wait_msk_nr()
@@ -97,12 +115,12 @@ wait_msk_nr()
 
 chk_msk_fallback_nr()
 {
-		__chk_nr "grep -c fallback" $*
+		__chk_msk_nr "grep -c fallback" $*
 }
 
 chk_msk_remote_key_nr()
 {
-		__chk_nr "grep -c remote_key" $*
+		__chk_msk_nr "grep -c remote_key" $*
 }
 
 __chk_listen()
@@ -142,6 +160,26 @@ chk_msk_listen()
 	nr=$(ss -Ml $filter | wc -l)
 }
 
+chk_msk_inuse()
+{
+	local expected=$1
+	local listen_nr
+
+	shift 1
+
+	listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN)
+	expected=$((expected + listen_nr))
+
+	for _ in $(seq 10); do
+		if [ $(get_msk_inuse) -eq $expected ];then
+			break
+		fi
+		sleep 0.1
+	done
+
+	__chk_nr get_msk_inuse $expected $*
+}
+
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -195,8 +233,10 @@ wait_connected $ns 10000
 chk_msk_nr 2 "after MPC handshake "
 chk_msk_remote_key_nr 2 "....chk remote_key"
 chk_msk_fallback_nr 0 "....chk no fallback"
+chk_msk_inuse 2 "....chk 2 msk in use"
 flush_pids
 
+chk_msk_inuse 0 "....chk 0 msk in use after flush"
 
 echo "a" | \
 	timeout ${timeout_test} \
@@ -211,8 +251,11 @@ echo "b" | \
 				127.0.0.1 >/dev/null &
 wait_connected $ns 10001
 chk_msk_fallback_nr 1 "check fallback"
+chk_msk_inuse 1 "....chk 1 msk in use"
 flush_pids
 
+chk_msk_inuse 0 "....chk 0 msk in use after flush"
+
 NR_CLIENTS=100
 for I in `seq 1 $NR_CLIENTS`; do
 	echo "a" | \
@@ -232,6 +275,9 @@ for I in `seq 1 $NR_CLIENTS`; do
 done
 
 wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use"
 flush_pids
 
+chk_msk_inuse 0 "....chk 0 msk in use after flush"
+
 exit $ret
-- 
cgit v1.2.3


From 2d0b2ae2871ae6d42a9f0a4280e0fb5bff8d38b8 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:12 +0100
Subject: selftests/xsk: print correct payload for packet dump

Print the correct payload when the packet dump option is selected. The
network to host conversion was forgotten and the payload was
erronously declared to be an int instead of an unsigned int.

Fixes: facb7cb2e909 ("selftests/bpf: Xsk selftests - SKB POLL, NOPOLL")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-2-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xskxceiver.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 162d3a516f2c..2ff43b22180f 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -767,7 +767,7 @@ static void pkt_dump(void *pkt, u32 len)
 	struct ethhdr *ethhdr;
 	struct udphdr *udphdr;
 	struct iphdr *iphdr;
-	int payload, i;
+	u32 payload, i;
 
 	ethhdr = pkt;
 	iphdr = pkt + sizeof(*ethhdr);
@@ -792,7 +792,7 @@ static void pkt_dump(void *pkt, u32 len)
 	fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
 	fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
 	/*extract L5 frame */
-	payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
+	payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE)));
 
 	fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
 	fprintf(stdout, "---------------------------------------\n");
-- 
cgit v1.2.3


From 5adaf52776a4178434c0670a8e05e529a2b4aafa Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:13 +0100
Subject: selftests/xsk: do not close unused file descriptors

Do not close descriptors that have never been used. File descriptor
fields that are not in use are erroneously marked with the number 0,
which is a valid fd. Mark unused fds with -1 instead and do not close
these when deleting the socket.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-3-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xsk.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 39d349509ba4..5e4a6552ed37 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -35,6 +35,8 @@
 #include "xsk.h"
 #include "bpf_util.h"
 
+#define FD_NOT_USED (-1)
+
 #ifndef SOL_XDP
  #define SOL_XDP 283
 #endif
@@ -583,6 +585,9 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 {
 	struct xsk_ctx *ctx = xsk->ctx;
 
+	if (ctx->xsks_map_fd == FD_NOT_USED)
+		return;
+
 	bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
 	close(ctx->xsks_map_fd);
 }
@@ -941,6 +946,9 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	ctx->umem = umem;
 	ctx->queue_id = queue_id;
 	bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
+	ctx->prog_fd = FD_NOT_USED;
+	ctx->link_fd = FD_NOT_USED;
+	ctx->xsks_map_fd = FD_NOT_USED;
 
 	ctx->fill = fill;
 	ctx->comp = comp;
@@ -1221,8 +1229,9 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 
 	if (ctx->refcount == 1) {
 		xsk_delete_bpf_maps(xsk);
-		close(ctx->prog_fd);
-		if (ctx->has_bpf_link)
+		if (ctx->prog_fd != FD_NOT_USED)
+			close(ctx->prog_fd);
+		if (ctx->has_bpf_link && ctx->link_fd != FD_NOT_USED)
 			close(ctx->link_fd);
 	}
 
-- 
cgit v1.2.3


From 1e04f23bccf967e00609ab3b210961db7a1886e4 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:14 +0100
Subject: selftests/xsk: submit correct number of frames in populate_fill_ring

Submit the correct number of frames in the function
xsk_populate_fill_ring(). For the tests that set the flag
use_addr_for_fill, uninitialized buffers were sent to the fill ring
following the correct ones. This has no impact on the tests, since
they only use the ones that were initialized. But for correctness,
this should be fixed.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-4-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xskxceiver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 2ff43b22180f..a239e975ab66 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1272,7 +1272,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream
 
 		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
 	}
-	xsk_ring_prod__submit(&umem->fq, buffers_to_fill);
+	xsk_ring_prod__submit(&umem->fq, i);
 }
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
-- 
cgit v1.2.3


From 085dcccfb7d3dc52ed708fc588587f319541bc83 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:15 +0100
Subject: selftests/xsk: print correct error codes when exiting

Print the correct error codes when exiting the test suite due to some
terminal error. Some of these had a switched sign and some of them
printed zero instead of errno.

Fixes: facb7cb2e909 ("selftests/bpf: Xsk selftests - SKB POLL, NOPOLL")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-5-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xskxceiver.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index a239e975ab66..72578cebfbf7 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -350,7 +350,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
 	umem = calloc(1, sizeof(struct xsk_umem_info));
 	if (!umem) {
 		munmap(bufs, umem_sz);
-		exit_with_error(-ENOMEM);
+		exit_with_error(ENOMEM);
 	}
 	umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 	ret = xsk_configure_umem(umem, bufs, umem_sz);
@@ -936,7 +936,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
 		if (ifobj->use_poll) {
 			ret = poll(fds, 1, POLL_TMOUT);
 			if (ret < 0)
-				exit_with_error(-ret);
+				exit_with_error(errno);
 
 			if (!ret) {
 				if (!is_umem_valid(test->ifobj_tx))
@@ -963,7 +963,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds)
 				if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
 					ret = poll(fds, 1, POLL_TMOUT);
 					if (ret < 0)
-						exit_with_error(-ret);
+						exit_with_error(errno);
 				}
 				ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 			}
@@ -1015,7 +1015,7 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd
 			if (timeout) {
 				if (ret < 0) {
 					ksft_print_msg("ERROR: [%s] Poll error %d\n",
-						       __func__, ret);
+						       __func__, errno);
 					return TEST_FAILURE;
 				}
 				if (ret == 0)
@@ -1024,7 +1024,7 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd
 			}
 			if (ret <= 0) {
 				ksft_print_msg("ERROR: [%s] Poll error %d\n",
-					       __func__, ret);
+					       __func__, errno);
 				return TEST_FAILURE;
 			}
 		}
@@ -1323,18 +1323,18 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) {
 		if (opts.attach_mode != XDP_ATTACHED_SKB) {
 			ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n");
-			exit_with_error(-EINVAL);
+			exit_with_error(EINVAL);
 		}
 	} else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) {
 		if (opts.attach_mode != XDP_ATTACHED_DRV) {
 			ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n");
-			exit_with_error(-EINVAL);
+			exit_with_error(EINVAL);
 		}
 	}
 
 	ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
 	if (ret)
-		exit_with_error(-ret);
+		exit_with_error(errno);
 }
 
 static void *worker_testapp_validate_tx(void *arg)
@@ -1541,7 +1541,7 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj
 
 	ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd);
 	if (ret)
-		exit_with_error(-ret);
+		exit_with_error(errno);
 }
 
 static void testapp_bpf_res(struct test_spec *test)
-- 
cgit v1.2.3


From a4ca62277b6aeda31dfd686740462982b6a480b9 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:16 +0100
Subject: selftests/xsk: remove unused variable outstanding_tx

Remove the unused variable outstanding_tx.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-6-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xsk.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 5e4a6552ed37..b166edfff86d 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -86,7 +86,6 @@ struct xsk_ctx {
 struct xsk_socket {
 	struct xsk_ring_cons *rx;
 	struct xsk_ring_prod *tx;
-	__u64 outstanding_tx;
 	struct xsk_ctx *ctx;
 	struct xsk_socket_config config;
 	int fd;
@@ -1021,7 +1020,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	if (err)
 		goto out_xsk_alloc;
 
-	xsk->outstanding_tx = 0;
 	ifindex = if_nametoindex(ifname);
 	if (!ifindex) {
 		err = -errno;
-- 
cgit v1.2.3


From 703bfd37101310ad5a6be09d5ff38c0e949ee8db Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:17 +0100
Subject: selftests/xsk: add debug option for creating netdevs

Add a new option to the test_xsk.sh script that only creates the two
veth netdevs and the extra namespace, then exits without running any
tests. The failed test can then be executed in the debugger without
having to create the netdevs and namespace manually. For ease-of-use,
the veth netdevs to use are printed so they can be copied into the
debugger.

Here is an example how to use it:

> sudo ./test_xsk.sh -d

veth10 veth11

> gdb xskxceiver

In gdb:

run -i veth10 -i veth11

And now the test cases can be debugged with gdb.

If you want to debug the test suite on a real NIC in loopback mode,
there is no need to use this feature as you already know the netdev of
your NIC.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-7-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xsk.sh | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index d821fd098504..cb315d85148b 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -74,6 +74,9 @@
 # Run and dump packet contents:
 #   sudo ./test_xsk.sh -D
 #
+# Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger:
+#   sudo ./test_xsk.sh -d
+#
 # Run test suite for physical device in loopback mode
 #   sudo ./test_xsk.sh -i IFACE
 
@@ -81,11 +84,12 @@
 
 ETH=""
 
-while getopts "vDi:" flag
+while getopts "vDi:d" flag
 do
 	case "${flag}" in
 		v) verbose=1;;
 		D) dump_pkts=1;;
+		d) debug=1;;
 		i) ETH=${OPTARG};;
 	esac
 done
@@ -174,6 +178,11 @@ statusList=()
 
 TEST_NAME="XSK_SELFTESTS_${VETH0}_SOFTIRQ"
 
+if [[ $debug -eq 1 ]]; then
+    echo "-i" ${VETH0} "-i" ${VETH1},${NS1}
+    exit
+fi
+
 exec_xskxceiver
 
 if [ -z $ETH ]; then
-- 
cgit v1.2.3


From efe620e5ba03330a71a85b40e68ee1b497c789ed Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:18 +0100
Subject: selftests/xsk: replace asm acquire/release implementations

Replace our own homegrown assembly store/release and load/acquire
implementations with the HW agnositic atomic APIs C11 offers. This to
make the code more portable, easier to read, and reduce the
maintenance burden.

The original code used load-acquire and store-release barriers
hand-coded in assembly. Since C11, these kind of operations are
offered as built-ins in gcc and llvm. The load-acquire operation
prevents hoisting of non-atomic memory operations to before this
operation and it corresponds to the __ATOMIC_ACQUIRE operation in the
built-in atomics. The store-release operation prevents hoisting of
non-atomic memory operations to after this operation and it
corresponds to the __ATOMIC_RELEASE operation in the built-in atomics.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-8-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xsk.h | 80 ++-------------------------------------
 1 file changed, 4 insertions(+), 76 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 997723b0bfb2..24ee765aded3 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -23,77 +23,6 @@
 extern "C" {
 #endif
 
-/* This whole API has been deprecated and moved to libxdp that can be found at
- * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
- * it should just be linking with libxdp instead of libbpf for this set of
- * functionality. If not, please submit a bug report on the aforementioned page.
- */
-
-/* Load-Acquire Store-Release barriers used by the XDP socket
- * library. The following macros should *NOT* be considered part of
- * the xsk.h API, and is subject to change anytime.
- *
- * LIBRARY INTERNAL
- */
-
-#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
-#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
-
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile("" : : : "memory");			\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile("" : : : "memory");			\
-		___p1;							\
-	})
-#elif defined(__aarch64__)
-# define libbpf_smp_store_release(p, v)					\
-		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1;					\
-		asm volatile ("ldar %w0, %1"				\
-			      : "=r" (___p1) : "Q" (*p) : "memory");	\
-		___p1;							\
-	})
-#elif defined(__riscv)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile ("fence rw,w" : : : "memory");		\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile ("fence r,rw" : : : "memory");		\
-		___p1;							\
-	})
-#endif
-
-#ifndef libbpf_smp_store_release
-#define libbpf_smp_store_release(p, v)					\
-	do {								\
-		__sync_synchronize();					\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-#endif
-
-#ifndef libbpf_smp_load_acquire
-#define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		__sync_synchronize();					\
-		___p1;							\
-	})
-#endif
-
-/* LIBRARY INTERNAL -- END */
-
 /* Do not access these members directly. Use the functions below. */
 #define DEFINE_XSK_RING(name) \
 struct name { \
@@ -168,7 +97,7 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
 	 * this function. Without this optimization it whould have been
 	 * free_entries = r->cached_prod - r->cached_cons + r->size.
 	 */
-	r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+	r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
 	r->cached_cons += r->size;
 
 	return r->cached_cons - r->cached_prod;
@@ -179,7 +108,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 	__u32 entries = r->cached_prod - r->cached_cons;
 
 	if (entries == 0) {
-		r->cached_prod = libbpf_smp_load_acquire(r->producer);
+		r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE);
 		entries = r->cached_prod - r->cached_cons;
 	}
 
@@ -202,7 +131,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
 	 */
-	libbpf_smp_store_release(prod->producer, *prod->producer + nb);
+	__atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
 }
 
 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -227,8 +156,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
 	 */
-	libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
-
+	__atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE);
 }
 
 static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
-- 
cgit v1.2.3


From 64aef77d750ede548699d81c959936289d7d7819 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:19 +0100
Subject: selftests/xsk: remove namespaces

Remove the namespaces used as they fill no function. This will
simplify the code for speeding up the tests in the following commits.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-9-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_xsk.sh    | 33 ++++++++-------------
 tools/testing/selftests/bpf/xsk_prereqs.sh | 12 ++------
 tools/testing/selftests/bpf/xskxceiver.c   | 46 ++++--------------------------
 tools/testing/selftests/bpf/xskxceiver.h   |  3 --
 4 files changed, 20 insertions(+), 74 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index cb315d85148b..b077cf58f825 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -24,8 +24,6 @@
 #      -----------     |     ----------
 #      |  vethX  | --------- |  vethY |
 #      -----------   peer    ----------
-#           |          |          |
-#      namespaceX      |     namespaceY
 #
 # AF_XDP is an address family optimized for high performance packet processing,
 # it is XDP’s user-space interface.
@@ -39,10 +37,9 @@
 # Prerequisites setup by script:
 #
 #   Set up veth interfaces as per the topology shown ^^:
-#   * setup two veth interfaces and one namespace
-#   ** veth<xxxx> in root namespace
-#   ** veth<yyyy> in af_xdp<xxxx> namespace
-#   ** namespace af_xdp<xxxx>
+#   * setup two veth interfaces
+#   ** veth<xxxx>
+#   ** veth<yyyy>
 #   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
@@ -103,28 +100,25 @@ VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head -
 VETH0=ve${VETH0_POSTFIX}
 VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
 VETH1=ve${VETH1_POSTFIX}
-NS0=root
-NS1=af_xdp${VETH1_POSTFIX}
 MTU=1500
 
 trap ctrl_c INT
 
 function ctrl_c() {
-        cleanup_exit ${VETH0} ${VETH1} ${NS1}
+        cleanup_exit ${VETH0} ${VETH1}
 	exit 1
 }
 
 setup_vethPairs() {
 	if [[ $verbose -eq 1 ]]; then
-	        echo "setting up ${VETH0}: namespace: ${NS0}"
+	        echo "setting up ${VETH0}"
 	fi
-	ip netns add ${NS1}
 	ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
 	if [ -f /proc/net/if_inet6 ]; then
 		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
 	fi
 	if [[ $verbose -eq 1 ]]; then
-	        echo "setting up ${VETH1}: namespace: ${NS1}"
+	        echo "setting up ${VETH1}"
 	fi
 
 	if [[ $busy_poll -eq 1 ]]; then
@@ -134,18 +128,15 @@ setup_vethPairs() {
 		echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout
 	fi
 
-	ip link set ${VETH1} netns ${NS1}
-	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
+	ip link set ${VETH1} mtu ${MTU}
 	ip link set ${VETH0} mtu ${MTU}
-	ip netns exec ${NS1} ip link set ${VETH1} up
-	ip netns exec ${NS1} ip link set dev lo up
+	ip link set ${VETH1} up
 	ip link set ${VETH0} up
 }
 
 if [ ! -z $ETH ]; then
 	VETH0=${ETH}
 	VETH1=${ETH}
-	NS1=""
 else
 	validate_root_exec
 	validate_veth_support ${VETH0}
@@ -155,7 +146,7 @@ else
 	retval=$?
 	if [ $retval -ne 0 ]; then
 		test_status $retval "${TEST_NAME}"
-		cleanup_exit ${VETH0} ${VETH1} ${NS1}
+		cleanup_exit ${VETH0} ${VETH1}
 		exit $retval
 	fi
 fi
@@ -179,14 +170,14 @@ statusList=()
 TEST_NAME="XSK_SELFTESTS_${VETH0}_SOFTIRQ"
 
 if [[ $debug -eq 1 ]]; then
-    echo "-i" ${VETH0} "-i" ${VETH1},${NS1}
+    echo "-i" ${VETH0} "-i" ${VETH1}
     exit
 fi
 
 exec_xskxceiver
 
 if [ -z $ETH ]; then
-	cleanup_exit ${VETH0} ${VETH1} ${NS1}
+	cleanup_exit ${VETH0} ${VETH1}
 fi
 TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
 busy_poll=1
@@ -199,7 +190,7 @@ exec_xskxceiver
 ## END TESTS
 
 if [ -z $ETH ]; then
-	cleanup_exit ${VETH0} ${VETH1} ${NS1}
+	cleanup_exit ${VETH0} ${VETH1}
 fi
 
 failures=0
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index a0b71723a818..ae697a10a056 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -55,21 +55,13 @@ test_exit()
 
 clear_configs()
 {
-	if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
-		[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
-			{ ip netns exec $3 ip link del $2; }
-		ip netns del $3
-	fi
-	#Once we delete a veth pair node, the entire veth pair is removed,
-	#this is just to be cautious just incase the NS does not exist then
-	#veth node inside NS won't get removed so we explicitly remove it
 	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
 		{ ip link del $1; }
 }
 
 cleanup_exit()
 {
-	clear_configs $1 $2 $3
+	clear_configs $1 $2
 }
 
 validate_ip_utility()
@@ -83,7 +75,7 @@ exec_xskxceiver()
 	        ARGS+="-b "
 	fi
 
-	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${ARGS}
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1} ${ARGS}
 
 	retval=$?
 	test_status $retval "${TEST_NAME}"
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 72578cebfbf7..4c8f32e1c431 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -55,12 +55,11 @@
  * Flow:
  * -----
  * - Single process spawns two threads: Tx and Rx
- * - Each of these two threads attach to a veth interface within their assigned
- *   namespaces
- * - Each thread Creates one AF_XDP socket connected to a unique umem for each
+ * - Each of these two threads attach to a veth interface
+ * - Each thread creates one AF_XDP socket connected to a unique umem for each
  *   veth interface
- * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
- * - Rx thread verifies if all 10k packets were received and delivered in-order,
+ * - Tx thread Transmits a number of packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all packets were received and delivered in-order,
  *   and have the right content
  *
  * Enable/disable packet dump mode:
@@ -399,28 +398,6 @@ static void usage(const char *prog)
 	ksft_print_msg(str, prog);
 }
 
-static int switch_namespace(const char *nsname)
-{
-	char fqns[26] = "/var/run/netns/";
-	int nsfd;
-
-	if (!nsname || strlen(nsname) == 0)
-		return -1;
-
-	strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
-	nsfd = open(fqns, O_RDONLY);
-
-	if (nsfd == -1)
-		exit_with_error(errno);
-
-	if (setns(nsfd, 0) == -1)
-		exit_with_error(errno);
-
-	print_verbose("NS switched: %s\n", nsname);
-
-	return nsfd;
-}
-
 static bool validate_interface(struct ifobject *ifobj)
 {
 	if (!strcmp(ifobj->ifname, ""))
@@ -438,8 +415,6 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 	opterr = 0;
 
 	for (;;) {
-		char *sptr, *token;
-
 		c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index);
 		if (c == -1)
 			break;
@@ -453,11 +428,8 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 			else
 				break;
 
-			sptr = strndupa(optarg, strlen(optarg));
-			memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
-			token = strsep(&sptr, ",");
-			if (token)
-				memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS);
+			memcpy(ifobj->ifname, optarg,
+			       min_t(size_t, MAX_INTERFACE_NAME_CHARS, strlen(optarg)));
 			interface_nb++;
 			break;
 		case 'D':
@@ -1283,8 +1255,6 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	int ret, ifindex;
 	void *bufs;
 
-	ifobject->ns_fd = switch_namespace(ifobject->nsname);
-
 	if (ifobject->umem->unaligned_mode)
 		mmap_flags |= MAP_HUGETLB;
 
@@ -1843,8 +1813,6 @@ static struct ifobject *ifobject_create(void)
 	if (!ifobj->umem)
 		goto out_umem;
 
-	ifobj->ns_fd = -1;
-
 	return ifobj;
 
 out_umem:
@@ -1856,8 +1824,6 @@ out_xsk_arr:
 
 static void ifobject_delete(struct ifobject *ifobj)
 {
-	if (ifobj->ns_fd != -1)
-		close(ifobj->ns_fd);
 	free(ifobj->umem);
 	free(ifobj->xsk_arr);
 	free(ifobj);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index edb76d2def9f..dcb908f5bb4c 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -30,7 +30,6 @@
 #define TEST_CONTINUE 1
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 16
-#define MAX_INTERFACES_NAMESPACE_CHARS 16
 #define MAX_SOCKETS 2
 #define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10
@@ -133,14 +132,12 @@ typedef void *(*thread_func_t)(void *arg);
 
 struct ifobject {
 	char ifname[MAX_INTERFACE_NAME_CHARS];
-	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
 	struct xsk_socket_info *xsk;
 	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
 	thread_func_t func_ptr;
 	validation_func_t validation_func;
 	struct pkt_stream *pkt_stream;
-	int ns_fd;
 	int xsk_map_fd;
 	u32 dst_ip;
 	u32 src_ip;
-- 
cgit v1.2.3


From aa61d81f397c65b9951b0da2538657154564564f Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:20 +0100
Subject: selftests/xsk: load and attach XDP program only once per mode

Load and attach the XDP program only once per XDP mode that is being
executed. Today, the XDP program is loaded and attached for every
test, then unloaded, which takes a long time on real NICs, since they
have to reconfigure their HW, in contrast to veth. The test suite now
completes in 21 seconds, instead of 207 seconds previously on my
machine. This is a speed-up of around 10x.

This is accomplished by moving the XDP loading from the worker threads
to the main thread and replacing the XDP loading interfaces of xsk.c
that was taken from the xsk support in libbpf, with something more
explicit that is more useful for these tests. Instead, the relevant
file descriptors and ifindexes are just passed down to the new
functions.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-10-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xsk.c        |  92 ++++++++++++-------
 tools/testing/selftests/bpf/xsk.h        |   7 +-
 tools/testing/selftests/bpf/xskxceiver.c | 147 +++++++++++++++++++------------
 tools/testing/selftests/bpf/xskxceiver.h |   3 +
 4 files changed, 162 insertions(+), 87 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index b166edfff86d..1dd953541812 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -51,6 +51,8 @@
 
 #define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
 
+#define XSKMAP_SIZE 1
+
 enum xsk_prog {
 	XSK_PROG_FALLBACK,
 	XSK_PROG_REDIRECT_FLAGS,
@@ -387,10 +389,9 @@ static enum xsk_prog get_xsk_prog(void)
 	return detected;
 }
 
-static int xsk_load_xdp_prog(struct xsk_socket *xsk)
+static int __xsk_load_xdp_prog(int xsk_map_fd)
 {
 	static const int log_buf_size = 16 * 1024;
-	struct xsk_ctx *ctx = xsk->ctx;
 	char log_buf[log_buf_size];
 	int prog_fd;
 
@@ -418,7 +419,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* *(u32 *)(r10 - 4) = r2 */
 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
 		/* r3 = XDP_PASS */
 		BPF_MOV64_IMM(BPF_REG_3, 2),
 		/* call bpf_redirect_map */
@@ -430,7 +431,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* r2 += -4 */
 		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
 		/* call bpf_map_lookup_elem */
 		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 		/* r1 = r0 */
@@ -442,7 +443,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* r2 = *(u32 *)(r10 - 4) */
 		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
 		/* r3 = 0 */
 		BPF_MOV64_IMM(BPF_REG_3, 0),
 		/* call bpf_redirect_map */
@@ -461,7 +462,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		/* r2 = *(u32 *)(r1 + 16) */
 		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
 		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
 		/* r3 = XDP_PASS */
 		BPF_MOV64_IMM(BPF_REG_3, 2),
 		/* call bpf_redirect_map */
@@ -480,13 +481,40 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 
 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
 				progs[option], insns_cnt[option], &opts);
-	if (prog_fd < 0) {
+	if (prog_fd < 0)
 		pr_warn("BPF log buffer:\n%s", log_buf);
-		return prog_fd;
+
+	return prog_fd;
+}
+
+int xsk_attach_xdp_program(int ifindex, int prog_fd, u32 xdp_flags)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	__u32 prog_id = 0;
+	int link_fd;
+	int err;
+
+	err = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
+	if (err) {
+		pr_warn("getting XDP prog id failed\n");
+		return err;
 	}
 
-	ctx->prog_fd = prog_fd;
-	return 0;
+	/* If there's a netlink-based XDP prog loaded on interface, bail out
+	 * and ask user to do the removal by himself
+	 */
+	if (prog_id) {
+		pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
+		return -EINVAL;
+	}
+
+	opts.flags = xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
+
+	link_fd = bpf_link_create(prog_fd, ifindex, BPF_XDP, &opts);
+	if (link_fd < 0)
+		pr_warn("bpf_link_create failed: %s\n", strerror(errno));
+
+	return link_fd;
 }
 
 static int xsk_create_bpf_link(struct xsk_socket *xsk)
@@ -775,7 +803,7 @@ static int xsk_init_xdp_res(struct xsk_socket *xsk,
 	if (err)
 		return err;
 
-	err = xsk_load_xdp_prog(xsk);
+	err = __xsk_load_xdp_prog(*xsks_map_fd);
 	if (err)
 		goto err_load_xdp_prog;
 
@@ -871,6 +899,22 @@ int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
 	return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
 }
 
+int xsk_load_xdp_program(int *xsk_map_fd, int *prog_fd)
+{
+	*xsk_map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", sizeof(int), sizeof(int),
+				     XSKMAP_SIZE, NULL);
+	if (*xsk_map_fd < 0)
+		return *xsk_map_fd;
+
+	*prog_fd = __xsk_load_xdp_prog(*xsk_map_fd);
+	if (*prog_fd < 0) {
+		close(*xsk_map_fd);
+		return *prog_fd;
+	}
+
+	return 0;
+}
+
 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
 				   __u32 queue_id)
 {
@@ -917,7 +961,7 @@ out_free:
 
 static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 				      struct xsk_umem *umem, int ifindex,
-				      const char *ifname, __u32 queue_id,
+				      __u32 queue_id,
 				      struct xsk_ring_prod *fill,
 				      struct xsk_ring_cons *comp)
 {
@@ -944,7 +988,6 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	ctx->refcount = 1;
 	ctx->umem = umem;
 	ctx->queue_id = queue_id;
-	bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
 	ctx->prog_fd = FD_NOT_USED;
 	ctx->link_fd = FD_NOT_USED;
 	ctx->xsks_map_fd = FD_NOT_USED;
@@ -991,7 +1034,7 @@ int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
 }
 
 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
-			      const char *ifname,
+			      int ifindex,
 			      __u32 queue_id, struct xsk_umem *umem,
 			      struct xsk_ring_cons *rx,
 			      struct xsk_ring_prod *tx,
@@ -1005,7 +1048,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	struct xdp_mmap_offsets off;
 	struct xsk_socket *xsk;
 	struct xsk_ctx *ctx;
-	int err, ifindex;
+	int err;
 
 	if (!umem || !xsk_ptr || !(rx || tx))
 		return -EFAULT;
@@ -1020,12 +1063,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	if (err)
 		goto out_xsk_alloc;
 
-	ifindex = if_nametoindex(ifname);
-	if (!ifindex) {
-		err = -errno;
-		goto out_xsk_alloc;
-	}
-
 	if (umem->refcount++ > 0) {
 		xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
 		if (xsk->fd < 0) {
@@ -1045,8 +1082,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			goto out_socket;
 		}
 
-		ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id,
-				     fill, comp);
+		ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
 		if (!ctx) {
 			err = -ENOMEM;
 			goto out_socket;
@@ -1144,12 +1180,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 		goto out_mmap_tx;
 	}
 
-	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
-		err = __xsk_setup_xdp_prog(xsk, NULL);
-		if (err)
-			goto out_mmap_tx;
-	}
-
 	*xsk_ptr = xsk;
 	umem->fill_save = NULL;
 	umem->comp_save = NULL;
@@ -1173,7 +1203,7 @@ out_xsk_alloc:
 	return err;
 }
 
-int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
 		       __u32 queue_id, struct xsk_umem *umem,
 		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
 		       const struct xsk_socket_config *usr_config)
@@ -1181,7 +1211,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	if (!umem)
 		return -EFAULT;
 
-	return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+	return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
 					 rx, tx, umem->fill_save,
 					 umem->comp_save, usr_config);
 }
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 24ee765aded3..7a5aeacd261b 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -204,6 +204,9 @@ int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
 /* Flags for the libbpf_flags field. */
 #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
 
+int xsk_load_xdp_program(int *xsk_map_fd, int *prog_fd);
+int xsk_attach_xdp_program(int ifindex, int prog_fd, u32 xdp_flags);
+
 struct xsk_socket_config {
 	__u32 rx_size;
 	__u32 tx_size;
@@ -219,13 +222,13 @@ int xsk_umem__create(struct xsk_umem **umem,
 		     struct xsk_ring_cons *comp,
 		     const struct xsk_umem_config *config);
 int xsk_socket__create(struct xsk_socket **xsk,
-		       const char *ifname, __u32 queue_id,
+		       int ifindex, __u32 queue_id,
 		       struct xsk_umem *umem,
 		       struct xsk_ring_cons *rx,
 		       struct xsk_ring_prod *tx,
 		       const struct xsk_socket_config *config);
 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
-			      const char *ifname,
+			      int ifindex,
 			      __u32 queue_id, struct xsk_umem *umem,
 			      struct xsk_ring_cons *rx,
 			      struct xsk_ring_prod *tx,
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 4c8f32e1c431..0c0974c209cd 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -268,6 +268,11 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
 	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
 }
 
+static u32 mode_to_xdp_flags(enum test_mode mode)
+{
+	return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+}
+
 static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size)
 {
 	struct xsk_umem_config cfg = {
@@ -329,7 +334,7 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
 
 	txr = ifobject->tx_on ? &xsk->tx : NULL;
 	rxr = ifobject->rx_on ? &xsk->rx : NULL;
-	return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg);
+	return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
 }
 
 static bool ifobj_zc_avail(struct ifobject *ifobject)
@@ -359,8 +364,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
 	xsk = calloc(1, sizeof(struct xsk_socket_info));
 	if (!xsk)
 		goto out;
-	ifobject->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-	ifobject->xdp_flags |= XDP_FLAGS_DRV_MODE;
+	ifobject->xdp_flags = XDP_FLAGS_DRV_MODE;
 	ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
 	ifobject->rx_on = true;
 	xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
@@ -430,6 +434,11 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 
 			memcpy(ifobj->ifname, optarg,
 			       min_t(size_t, MAX_INTERFACE_NAME_CHARS, strlen(optarg)));
+
+			ifobj->ifindex = if_nametoindex(ifobj->ifname);
+			if (!ifobj->ifindex)
+				exit_with_error(errno);
+
 			interface_nb++;
 			break;
 		case 'D':
@@ -510,12 +519,6 @@ static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	for (i = 0; i < MAX_INTERFACES; i++) {
 		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
 
-		ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-		if (mode == TEST_MODE_SKB)
-			ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE;
-		else
-			ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE;
-
 		ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
 		if (mode == TEST_MODE_ZC)
 			ifobj->bind_flags |= XDP_ZEROCOPY;
@@ -1252,7 +1255,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-	int ret, ifindex;
+	u32 queue_id = 0;
+	int ret, fd;
 	void *bufs;
 
 	if (ifobject->umem->unaligned_mode)
@@ -1278,31 +1282,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (!ifobject->rx_on)
 		return;
 
-	ifindex = if_nametoindex(ifobject->ifname);
-	if (!ifindex)
-		exit_with_error(errno);
-
-	ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd);
-	if (ret)
-		exit_with_error(-ret);
-
-	ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts);
-	if (ret)
-		exit_with_error(-ret);
-
-	if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) {
-		if (opts.attach_mode != XDP_ATTACHED_SKB) {
-			ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n");
-			exit_with_error(EINVAL);
-		}
-	} else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) {
-		if (opts.attach_mode != XDP_ATTACHED_DRV) {
-			ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n");
-			exit_with_error(EINVAL);
-		}
-	}
-
-	ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
+	fd = xsk_socket__fd(ifobject->xsk->xsk);
+	ret = bpf_map_update_elem(ifobject->xsk_map_fd, &queue_id, &fd, 0);
 	if (ret)
 		exit_with_error(errno);
 }
@@ -1336,15 +1317,19 @@ static void *worker_testapp_validate_rx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_rx;
+	int id = 0, err, fd = xsk_socket__fd(ifobject->xsk->xsk);
 	struct pollfd fds = { };
-	int id = 0;
-	int err;
+	u32 queue_id = 0;
 
 	if (test->current_step == 1) {
 		thread_common_ops(test, ifobject);
 	} else {
 		bpf_map_delete_elem(ifobject->xsk_map_fd, &id);
-		xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
+		err = bpf_map_update_elem(ifobject->xsk_map_fd, &queue_id, &fd, 0);
+		if (err) {
+			printf("Error: Failed to update xskmap, error %s\n", strerror(err));
+			exit_with_error(err);
+		}
 	}
 
 	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
@@ -1413,7 +1398,10 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct
 	pthread_join(t0, NULL);
 
 	if (test->total_steps == test->current_step || test->fail) {
+		u32 queue_id = 0;
+
 		xsk_socket__delete(ifobj->xsk->xsk);
+		bpf_map_delete_elem(ifobj->xsk_map_fd, &queue_id);
 		testapp_clean_xsk_umem(ifobj);
 	}
 
@@ -1502,14 +1490,14 @@ static void testapp_bidi(struct test_spec *test)
 
 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
 {
-	int ret;
+	int ret, queue_id = 0, fd = xsk_socket__fd(ifobj_rx->xsk->xsk);
 
 	xsk_socket__delete(ifobj_tx->xsk->xsk);
 	xsk_socket__delete(ifobj_rx->xsk->xsk);
 	ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
 	ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
 
-	ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd);
+	ret = bpf_map_update_elem(ifobj_rx->xsk_map_fd, &queue_id, &fd, 0);
 	if (ret)
 		exit_with_error(errno);
 }
@@ -1673,8 +1661,9 @@ static void testapp_invalid_desc(struct test_spec *test)
 
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
 		       const char *dst_ip, const char *src_ip, const u16 dst_port,
-		       const u16 src_port, thread_func_t func_ptr)
+		       const u16 src_port, thread_func_t func_ptr, bool load_xdp)
 {
+	int xsk_map_fd, prog_fd, err;
 	struct in_addr ip;
 
 	memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
@@ -1690,6 +1679,24 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 	ifobj->src_port = src_port;
 
 	ifobj->func_ptr = func_ptr;
+
+	if (!load_xdp)
+		return;
+
+	err = xsk_load_xdp_program(&xsk_map_fd, &prog_fd);
+	if (err) {
+		printf("Error loading XDP program\n");
+		exit_with_error(err);
+	}
+
+	ifobj->xsk_map_fd = xsk_map_fd;
+	ifobj->prog_fd = prog_fd;
+	ifobj->xdp_flags = mode_to_xdp_flags(TEST_MODE_SKB);
+	ifobj->link_fd = xsk_attach_xdp_program(ifobj->ifindex, prog_fd, ifobj->xdp_flags);
+	if (ifobj->link_fd < 0) {
+		printf("Error attaching XDP program\n");
+		exit_with_error(ifobj->link_fd);
+	}
 }
 
 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
@@ -1824,12 +1831,15 @@ out_xsk_arr:
 
 static void ifobject_delete(struct ifobject *ifobj)
 {
+	close(ifobj->prog_fd);
+	close(ifobj->xsk_map_fd);
+
 	free(ifobj->umem);
 	free(ifobj->xsk_arr);
 	free(ifobj);
 }
 
-static bool is_xdp_supported(struct ifobject *ifobject)
+static bool is_xdp_supported(int ifindex)
 {
 	int flags = XDP_FLAGS_DRV_MODE;
 
@@ -1838,7 +1848,6 @@ static bool is_xdp_supported(struct ifobject *ifobject)
 		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
 		BPF_EXIT_INSN()
 	};
-	int ifindex = if_nametoindex(ifobject->ifname);
 	int prog_fd, insn_cnt = ARRAY_SIZE(insns);
 	int err;
 
@@ -1858,6 +1867,29 @@ static bool is_xdp_supported(struct ifobject *ifobject)
 	return true;
 }
 
+static void change_to_drv_mode(struct ifobject *ifobj)
+{
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+	int ret;
+
+	close(ifobj->link_fd);
+	ifobj->link_fd = xsk_attach_xdp_program(ifobj->ifindex, ifobj->prog_fd,
+						XDP_FLAGS_DRV_MODE);
+	if (ifobj->link_fd < 0) {
+		ksft_print_msg("Error attaching XDP program\n");
+		exit_with_error(-ifobj->link_fd);
+	}
+
+	ret = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &opts);
+	if (ret)
+		exit_with_error(errno);
+
+	if (opts.attach_mode != XDP_ATTACHED_DRV) {
+		ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+		exit_with_error(EINVAL);
+	}
+}
+
 int main(int argc, char **argv)
 {
 	struct pkt_stream *rx_pkt_stream_default;
@@ -1866,7 +1898,7 @@ int main(int argc, char **argv)
 	int modes = TEST_MODE_SKB + 1;
 	u32 i, j, failed_tests = 0;
 	struct test_spec test;
-	bool shared_umem;
+	bool shared_netdev;
 
 	/* Use libbpf 1.0 API mode */
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
@@ -1881,27 +1913,27 @@ int main(int argc, char **argv)
 	setlocale(LC_ALL, "");
 
 	parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
-	shared_umem = !strcmp(ifobj_tx->ifname, ifobj_rx->ifname);
 
-	ifobj_tx->shared_umem = shared_umem;
-	ifobj_rx->shared_umem = shared_umem;
+	shared_netdev = (ifobj_tx->ifindex == ifobj_rx->ifindex);
+	ifobj_tx->shared_umem = shared_netdev;
+	ifobj_rx->shared_umem = shared_netdev;
 
 	if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
 		usage(basename(argv[0]));
 		ksft_exit_xfail();
 	}
 
-	init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
-		   worker_testapp_validate_tx);
-	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
-		   worker_testapp_validate_rx);
-
-	if (is_xdp_supported(ifobj_tx)) {
+	if (is_xdp_supported(ifobj_tx->ifindex)) {
 		modes++;
 		if (ifobj_zc_avail(ifobj_tx))
 			modes++;
 	}
 
+	init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
+		   worker_testapp_validate_rx, true);
+	init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
+		   worker_testapp_validate_tx, !shared_netdev);
+
 	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
 	tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
 	rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
@@ -1912,7 +1944,13 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(modes * TEST_TYPE_MAX);
 
-	for (i = 0; i < modes; i++)
+	for (i = 0; i < modes; i++) {
+		if (i == TEST_MODE_DRV) {
+			change_to_drv_mode(ifobj_rx);
+			if (!shared_netdev)
+				change_to_drv_mode(ifobj_tx);
+		}
+
 		for (j = 0; j < TEST_TYPE_MAX; j++) {
 			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
 			run_pkt_test(&test, i, j);
@@ -1921,6 +1959,7 @@ int main(int argc, char **argv)
 			if (test.fail)
 				failed_tests++;
 		}
+	}
 
 	pkt_stream_delete(tx_pkt_stream_default);
 	pkt_stream_delete(rx_pkt_stream_default);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index dcb908f5bb4c..b2ba877b1966 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -139,6 +139,9 @@ struct ifobject {
 	validation_func_t validation_func;
 	struct pkt_stream *pkt_stream;
 	int xsk_map_fd;
+	int prog_fd;
+	int link_fd;
+	int ifindex;
 	u32 dst_ip;
 	u32 src_ip;
 	u32 xdp_flags;
-- 
cgit v1.2.3


From 6b3c0821caa49538c49262b041bae59bad523c7c Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:21 +0100
Subject: selftests/xsk: remove unnecessary code in control path

Remove unnecessary code in the control path. This is located in the
file xsk.c that was moved from libbpf when the xsk support there was
deprecated. Some of the code there is not needed anymore as the
selftests are only guaranteed to run on the kernel it is shipped
with. Therefore, all the code that has to deal with compatibility of
older kernels can be dropped and also any other function that is not
of any use for the tests.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-11-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xsk.c        | 617 +------------------------------
 tools/testing/selftests/bpf/xsk.h        |   9 -
 tools/testing/selftests/bpf/xskxceiver.c |   8 -
 3 files changed, 3 insertions(+), 631 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 1dd953541812..9ed31d280e48 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -35,8 +35,6 @@
 #include "xsk.h"
 #include "bpf_util.h"
 
-#define FD_NOT_USED (-1)
-
 #ifndef SOL_XDP
  #define SOL_XDP 283
 #endif
@@ -53,11 +51,6 @@
 
 #define XSKMAP_SIZE 1
 
-enum xsk_prog {
-	XSK_PROG_FALLBACK,
-	XSK_PROG_REDIRECT_FLAGS,
-};
-
 struct xsk_umem {
 	struct xsk_ring_prod *fill_save;
 	struct xsk_ring_cons *comp_save;
@@ -78,11 +71,6 @@ struct xsk_ctx {
 	int refcount;
 	int ifindex;
 	struct list_head list;
-	int prog_fd;
-	int link_fd;
-	int xsks_map_fd;
-	char ifname[IFNAMSIZ];
-	bool has_bpf_link;
 };
 
 struct xsk_socket {
@@ -93,27 +81,6 @@ struct xsk_socket {
 	int fd;
 };
 
-struct xsk_nl_info {
-	bool xdp_prog_attached;
-	int ifindex;
-	int fd;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_ring_offset_v1 {
-	__u64 producer;
-	__u64 consumer;
-	__u64 desc;
-};
-
-/* Up until and including Linux 5.3 */
-struct xdp_mmap_offsets_v1 {
-	struct xdp_ring_offset_v1 rx;
-	struct xdp_ring_offset_v1 tx;
-	struct xdp_ring_offset_v1 fr;
-	struct xdp_ring_offset_v1 cr;
-};
-
 int xsk_umem__fd(const struct xsk_umem *umem)
 {
 	return umem ? umem->fd : -EINVAL;
@@ -156,55 +123,17 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
 	if (!usr_cfg) {
 		cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 		cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-		cfg->libbpf_flags = 0;
-		cfg->xdp_flags = 0;
 		cfg->bind_flags = 0;
 		return 0;
 	}
 
-	if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
-		return -EINVAL;
-
 	cfg->rx_size = usr_cfg->rx_size;
 	cfg->tx_size = usr_cfg->tx_size;
-	cfg->libbpf_flags = usr_cfg->libbpf_flags;
-	cfg->xdp_flags = usr_cfg->xdp_flags;
 	cfg->bind_flags = usr_cfg->bind_flags;
 
 	return 0;
 }
 
-static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
-{
-	struct xdp_mmap_offsets_v1 off_v1;
-
-	/* getsockopt on a kernel <= 5.3 has no flags fields.
-	 * Copy over the offsets to the correct places in the >=5.4 format
-	 * and put the flags where they would have been on that kernel.
-	 */
-	memcpy(&off_v1, off, sizeof(off_v1));
-
-	off->rx.producer = off_v1.rx.producer;
-	off->rx.consumer = off_v1.rx.consumer;
-	off->rx.desc = off_v1.rx.desc;
-	off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
-
-	off->tx.producer = off_v1.tx.producer;
-	off->tx.consumer = off_v1.tx.consumer;
-	off->tx.desc = off_v1.tx.desc;
-	off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
-
-	off->fr.producer = off_v1.fr.producer;
-	off->fr.consumer = off_v1.fr.consumer;
-	off->fr.desc = off_v1.fr.desc;
-	off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
-
-	off->cr.producer = off_v1.cr.producer;
-	off->cr.consumer = off_v1.cr.consumer;
-	off->cr.desc = off_v1.cr.desc;
-	off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
-}
-
 static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
 {
 	socklen_t optlen;
@@ -218,11 +147,6 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
 	if (optlen == sizeof(*off))
 		return 0;
 
-	if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
-		xsk_mmap_offsets_v1(off);
-		return 0;
-	}
-
 	return -EINVAL;
 }
 
@@ -343,122 +267,19 @@ out_umem_alloc:
 	return err;
 }
 
-struct xsk_umem_config_v1 {
-	__u32 fill_size;
-	__u32 comp_size;
-	__u32 frame_size;
-	__u32 frame_headroom;
-};
-
-static enum xsk_prog get_xsk_prog(void)
-{
-	enum xsk_prog detected = XSK_PROG_FALLBACK;
-	char data_in = 0, data_out;
-	struct bpf_insn insns[] = {
-		BPF_LD_MAP_FD(BPF_REG_1, 0),
-		BPF_MOV64_IMM(BPF_REG_2, 0),
-		BPF_MOV64_IMM(BPF_REG_3, XDP_PASS),
-		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-		BPF_EXIT_INSN(),
-	};
-	LIBBPF_OPTS(bpf_test_run_opts, opts,
-		.data_in = &data_in,
-		.data_size_in = 1,
-		.data_out = &data_out,
-	);
-
-	int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
-
-	map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
-	if (map_fd < 0)
-		return detected;
-
-	insns[0].imm = map_fd;
-
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
-	if (prog_fd < 0) {
-		close(map_fd);
-		return detected;
-	}
-
-	ret = bpf_prog_test_run_opts(prog_fd, &opts);
-	if (!ret && opts.retval == XDP_PASS)
-		detected = XSK_PROG_REDIRECT_FLAGS;
-	close(prog_fd);
-	close(map_fd);
-	return detected;
-}
-
 static int __xsk_load_xdp_prog(int xsk_map_fd)
 {
 	static const int log_buf_size = 16 * 1024;
 	char log_buf[log_buf_size];
 	int prog_fd;
 
-	/* This is the fallback C-program:
-	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
-	 * {
-	 *     int ret, index = ctx->rx_queue_index;
-	 *
-	 *     // A set entry here means that the correspnding queue_id
-	 *     // has an active AF_XDP socket bound to it.
-	 *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
-	 *     if (ret > 0)
-	 *         return ret;
-	 *
-	 *     // Fallback for pre-5.3 kernels, not supporting default
-	 *     // action in the flags parameter.
-	 *     if (bpf_map_lookup_elem(&xsks_map, &index))
-	 *         return bpf_redirect_map(&xsks_map, index, 0);
-	 *     return XDP_PASS;
-	 * }
-	 */
-	struct bpf_insn prog[] = {
-		/* r2 = *(u32 *)(r1 + 16) */
-		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
-		/* *(u32 *)(r10 - 4) = r2 */
-		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
-		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
-		/* r3 = XDP_PASS */
-		BPF_MOV64_IMM(BPF_REG_3, 2),
-		/* call bpf_redirect_map */
-		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-		/* if w0 != 0 goto pc+13 */
-		BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
-		/* r2 = r10 */
-		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-		/* r2 += -4 */
-		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
-		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
-		/* call bpf_map_lookup_elem */
-		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
-		/* r1 = r0 */
-		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-		/* r0 = XDP_PASS */
-		BPF_MOV64_IMM(BPF_REG_0, 2),
-		/* if r1 == 0 goto pc+5 */
-		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
-		/* r2 = *(u32 *)(r10 - 4) */
-		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
-		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
-		/* r3 = 0 */
-		BPF_MOV64_IMM(BPF_REG_3, 0),
-		/* call bpf_redirect_map */
-		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-		/* The jumps are to this instruction */
-		BPF_EXIT_INSN(),
-	};
-
 	/* This is the post-5.3 kernel C-program:
 	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
 	 * {
 	 *     return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS);
 	 * }
 	 */
-	struct bpf_insn prog_redirect_flags[] = {
+	struct bpf_insn prog[] = {
 		/* r2 = *(u32 *)(r1 + 16) */
 		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
 		/* r1 = xskmap[] */
@@ -469,18 +290,14 @@ static int __xsk_load_xdp_prog(int xsk_map_fd)
 		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt[] = {ARRAY_SIZE(prog),
-			      ARRAY_SIZE(prog_redirect_flags),
-	};
-	struct bpf_insn *progs[] = {prog, prog_redirect_flags};
-	enum xsk_prog option = get_xsk_prog();
+	size_t insns_cnt = ARRAY_SIZE(prog);
 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 		.log_buf = log_buf,
 		.log_size = log_buf_size,
 	);
 
 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
-				progs[option], insns_cnt[option], &opts);
+				prog, insns_cnt, &opts);
 	if (prog_fd < 0)
 		pr_warn("BPF log buffer:\n%s", log_buf);
 
@@ -517,388 +334,6 @@ int xsk_attach_xdp_program(int ifindex, int prog_fd, u32 xdp_flags)
 	return link_fd;
 }
 
-static int xsk_create_bpf_link(struct xsk_socket *xsk)
-{
-	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
-	struct xsk_ctx *ctx = xsk->ctx;
-	__u32 prog_id = 0;
-	int link_fd;
-	int err;
-
-	err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
-	if (err) {
-		pr_warn("getting XDP prog id failed\n");
-		return err;
-	}
-
-	/* if there's a netlink-based XDP prog loaded on interface, bail out
-	 * and ask user to do the removal by himself
-	 */
-	if (prog_id) {
-		pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
-		return -EINVAL;
-	}
-
-	opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
-
-	link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
-	if (link_fd < 0) {
-		pr_warn("bpf_link_create failed: %s\n", strerror(errno));
-		return link_fd;
-	}
-
-	ctx->link_fd = link_fd;
-	return 0;
-}
-
-static int xsk_get_max_queues(struct xsk_socket *xsk)
-{
-	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
-	struct xsk_ctx *ctx = xsk->ctx;
-	struct ifreq ifr = {};
-	int fd, err, ret;
-
-	fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
-	if (fd < 0)
-		return -errno;
-
-	ifr.ifr_data = (void *)&channels;
-	bpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);
-	err = ioctl(fd, SIOCETHTOOL, &ifr);
-	if (err && errno != EOPNOTSUPP) {
-		ret = -errno;
-		goto out;
-	}
-
-	if (err) {
-		/* If the device says it has no channels, then all traffic
-		 * is sent to a single stream, so max queues = 1.
-		 */
-		ret = 1;
-	} else {
-		/* Take the max of rx, tx, combined. Drivers return
-		 * the number of channels in different ways.
-		 */
-		ret = max(channels.max_rx, channels.max_tx);
-		ret = max(ret, (int)channels.max_combined);
-	}
-
-out:
-	close(fd);
-	return ret;
-}
-
-static int xsk_create_bpf_maps(struct xsk_socket *xsk)
-{
-	struct xsk_ctx *ctx = xsk->ctx;
-	int max_queues;
-	int fd;
-
-	max_queues = xsk_get_max_queues(xsk);
-	if (max_queues < 0)
-		return max_queues;
-
-	fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map",
-			    sizeof(int), sizeof(int), max_queues, NULL);
-	if (fd < 0)
-		return fd;
-
-	ctx->xsks_map_fd = fd;
-
-	return 0;
-}
-
-static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
-{
-	struct xsk_ctx *ctx = xsk->ctx;
-
-	if (ctx->xsks_map_fd == FD_NOT_USED)
-		return;
-
-	bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id);
-	close(ctx->xsks_map_fd);
-}
-
-static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
-{
-	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
-	__u32 map_len = sizeof(struct bpf_map_info);
-	struct bpf_prog_info prog_info = {};
-	struct xsk_ctx *ctx = xsk->ctx;
-	struct bpf_map_info map_info;
-	int fd, err;
-
-	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
-	if (err)
-		return err;
-
-	num_maps = prog_info.nr_map_ids;
-
-	map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
-	if (!map_ids)
-		return -ENOMEM;
-
-	memset(&prog_info, 0, prog_len);
-	prog_info.nr_map_ids = num_maps;
-	prog_info.map_ids = (__u64)(unsigned long)map_ids;
-
-	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);
-	if (err)
-		goto out_map_ids;
-
-	ctx->xsks_map_fd = -1;
-
-	for (i = 0; i < prog_info.nr_map_ids; i++) {
-		fd = bpf_map_get_fd_by_id(map_ids[i]);
-		if (fd < 0)
-			continue;
-
-		memset(&map_info, 0, map_len);
-		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
-		if (err) {
-			close(fd);
-			continue;
-		}
-
-		if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
-			ctx->xsks_map_fd = fd;
-			break;
-		}
-
-		close(fd);
-	}
-
-	if (ctx->xsks_map_fd == -1)
-		err = -ENOENT;
-
-out_map_ids:
-	free(map_ids);
-	return err;
-}
-
-static int xsk_set_bpf_maps(struct xsk_socket *xsk)
-{
-	struct xsk_ctx *ctx = xsk->ctx;
-
-	return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,
-				   &xsk->fd, 0);
-}
-
-static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
-{
-	struct bpf_link_info link_info;
-	__u32 link_len;
-	__u32 id = 0;
-	int err;
-	int fd;
-
-	while (true) {
-		err = bpf_link_get_next_id(id, &id);
-		if (err) {
-			if (errno == ENOENT) {
-				err = 0;
-				break;
-			}
-			pr_warn("can't get next link: %s\n", strerror(errno));
-			break;
-		}
-
-		fd = bpf_link_get_fd_by_id(id);
-		if (fd < 0) {
-			if (errno == ENOENT)
-				continue;
-			pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
-			err = -errno;
-			break;
-		}
-
-		link_len = sizeof(struct bpf_link_info);
-		memset(&link_info, 0, link_len);
-		err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
-		if (err) {
-			pr_warn("can't get link info: %s\n", strerror(errno));
-			close(fd);
-			break;
-		}
-		if (link_info.type == BPF_LINK_TYPE_XDP) {
-			if (link_info.xdp.ifindex == ifindex) {
-				*link_fd = fd;
-				if (prog_id)
-					*prog_id = link_info.prog_id;
-				break;
-			}
-		}
-		close(fd);
-	}
-
-	return err;
-}
-
-static bool xsk_probe_bpf_link(void)
-{
-	LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);
-	struct bpf_insn insns[2] = {
-		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
-		BPF_EXIT_INSN()
-	};
-	int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);
-	int ifindex_lo = 1;
-	bool ret = false;
-	int err;
-
-	err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
-	if (err)
-		return ret;
-
-	if (link_fd >= 0)
-		return true;
-
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
-	if (prog_fd < 0)
-		return ret;
-
-	link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
-	close(prog_fd);
-
-	if (link_fd >= 0) {
-		ret = true;
-		close(link_fd);
-	}
-
-	return ret;
-}
-
-static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
-{
-	char ifname[IFNAMSIZ];
-	struct xsk_ctx *ctx;
-	char *interface;
-
-	ctx = calloc(1, sizeof(*ctx));
-	if (!ctx)
-		return -ENOMEM;
-
-	interface = if_indextoname(ifindex, &ifname[0]);
-	if (!interface) {
-		free(ctx);
-		return -errno;
-	}
-
-	ctx->ifindex = ifindex;
-	bpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);
-
-	xsk->ctx = ctx;
-	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
-
-	return 0;
-}
-
-static int xsk_init_xdp_res(struct xsk_socket *xsk,
-			    int *xsks_map_fd)
-{
-	struct xsk_ctx *ctx = xsk->ctx;
-	int err;
-
-	err = xsk_create_bpf_maps(xsk);
-	if (err)
-		return err;
-
-	err = __xsk_load_xdp_prog(*xsks_map_fd);
-	if (err)
-		goto err_load_xdp_prog;
-
-	if (ctx->has_bpf_link)
-		err = xsk_create_bpf_link(xsk);
-	else
-		err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd,
-				     xsk->config.xdp_flags, NULL);
-
-	if (err)
-		goto err_attach_xdp_prog;
-
-	if (!xsk->rx)
-		return err;
-
-	err = xsk_set_bpf_maps(xsk);
-	if (err)
-		goto err_set_bpf_maps;
-
-	return err;
-
-err_set_bpf_maps:
-	if (ctx->has_bpf_link)
-		close(ctx->link_fd);
-	else
-		bpf_xdp_detach(ctx->ifindex, 0, NULL);
-err_attach_xdp_prog:
-	close(ctx->prog_fd);
-err_load_xdp_prog:
-	xsk_delete_bpf_maps(xsk);
-	return err;
-}
-
-static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
-{
-	struct xsk_ctx *ctx = xsk->ctx;
-	int err;
-
-	ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
-	if (ctx->prog_fd < 0) {
-		err = -errno;
-		goto err_prog_fd;
-	}
-	err = xsk_lookup_bpf_maps(xsk);
-	if (err)
-		goto err_lookup_maps;
-
-	if (!xsk->rx)
-		return err;
-
-	err = xsk_set_bpf_maps(xsk);
-	if (err)
-		goto err_set_maps;
-
-	return err;
-
-err_set_maps:
-	close(ctx->xsks_map_fd);
-err_lookup_maps:
-	close(ctx->prog_fd);
-err_prog_fd:
-	if (ctx->has_bpf_link)
-		close(ctx->link_fd);
-	return err;
-}
-
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
-{
-	struct xsk_socket *xsk = _xdp;
-	struct xsk_ctx *ctx = xsk->ctx;
-	__u32 prog_id = 0;
-	int err;
-
-	if (ctx->has_bpf_link)
-		err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
-	else
-		err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
-
-	if (err)
-		return err;
-
-	err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
-			 xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
-
-	if (!err && xsks_map_fd)
-		*xsks_map_fd = ctx->xsks_map_fd;
-
-	return err;
-}
-
-int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
-{
-	return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
-}
-
 int xsk_load_xdp_program(int *xsk_map_fd, int *prog_fd)
 {
 	*xsk_map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", sizeof(int), sizeof(int),
@@ -988,51 +423,13 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	ctx->refcount = 1;
 	ctx->umem = umem;
 	ctx->queue_id = queue_id;
-	ctx->prog_fd = FD_NOT_USED;
-	ctx->link_fd = FD_NOT_USED;
-	ctx->xsks_map_fd = FD_NOT_USED;
 
 	ctx->fill = fill;
 	ctx->comp = comp;
 	list_add(&ctx->list, &umem->ctx_list);
-	ctx->has_bpf_link = xsk_probe_bpf_link();
 	return ctx;
 }
 
-static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
-{
-	free(xsk->ctx);
-	free(xsk);
-}
-
-int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
-{
-	xsk->ctx->xsks_map_fd = fd;
-	return xsk_set_bpf_maps(xsk);
-}
-
-int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
-{
-	struct xsk_socket *xsk;
-	int res;
-
-	xsk = calloc(1, sizeof(*xsk));
-	if (!xsk)
-		return -ENOMEM;
-
-	res = xsk_create_xsk_struct(ifindex, xsk);
-	if (res) {
-		free(xsk);
-		return -EINVAL;
-	}
-
-	res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
-
-	xsk_destroy_xsk_struct(xsk);
-
-	return res;
-}
-
 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			      int ifindex,
 			      __u32 queue_id, struct xsk_umem *umem,
@@ -1255,14 +652,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	ctx = xsk->ctx;
 	umem = ctx->umem;
 
-	if (ctx->refcount == 1) {
-		xsk_delete_bpf_maps(xsk);
-		if (ctx->prog_fd != FD_NOT_USED)
-			close(ctx->prog_fd);
-		if (ctx->has_bpf_link && ctx->link_fd != FD_NOT_USED)
-			close(ctx->link_fd);
-	}
-
 	xsk_put_ctx(ctx, true);
 
 	err = xsk_get_mmap_offsets(xsk->fd, &off);
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 7a5aeacd261b..bd5b55ad9f8a 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -197,21 +197,12 @@ struct xsk_umem_config {
 	__u32 flags;
 };
 
-int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd);
-int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
-int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
-
-/* Flags for the libbpf_flags field. */
-#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
-
 int xsk_load_xdp_program(int *xsk_map_fd, int *prog_fd);
 int xsk_attach_xdp_program(int ifindex, int prog_fd, u32 xdp_flags);
 
 struct xsk_socket_config {
 	__u32 rx_size;
 	__u32 tx_size;
-	__u32 libbpf_flags;
-	__u32 xdp_flags;
 	__u16 bind_flags;
 };
 
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 0c0974c209cd..693f8a63f718 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -102,12 +102,6 @@
 #include <linux/filter.h>
 #include "../kselftest.h"
 
-/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf.
- * Until xskxceiver is either moved or re-writed into libxdp, suppress
- * deprecation warnings in this file
- */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
 static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
 static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
 static const char *IP1 = "192.168.100.162";
@@ -326,8 +320,6 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
 	xsk->umem = umem;
 	cfg.rx_size = xsk->rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
-	cfg.xdp_flags = ifobject->xdp_flags;
 	cfg.bind_flags = ifobject->bind_flags;
 	if (shared)
 		cfg.bind_flags |= XDP_SHARED_UMEM;
-- 
cgit v1.2.3


From f0a249df1b071d6f7177cc615d688a3a5d48423a Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:22 +0100
Subject: selftests/xsk: get rid of built-in XDP program

Get rid of the built-in XDP program that was part of the old libbpf
code in xsk.c and replace it with an eBPF program build using the
framework by all the other bpf selftests. This will form the base for
adding more programs in later commits.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-12-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile              |  6 +-
 tools/testing/selftests/bpf/progs/xsk_xdp_progs.c | 19 +++++
 tools/testing/selftests/bpf/xsk.c                 | 88 +++++------------------
 tools/testing/selftests/bpf/xsk.h                 |  6 +-
 tools/testing/selftests/bpf/xskxceiver.c          | 72 +++++++++++--------
 tools/testing/selftests/bpf/xskxceiver.h          |  7 +-
 6 files changed, 92 insertions(+), 106 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/xsk_xdp_progs.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 205e8c3c346a..22533a18705e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -240,7 +240,6 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
 $(OUTPUT)/test_maps: $(TESTING_HELPERS)
 $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS)
 $(OUTPUT)/xsk.o: $(BPFOBJ)
-$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o
 
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
 $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
@@ -383,6 +382,7 @@ linked_maps.skel.h-deps := linked_maps1.bpf.o linked_maps2.bpf.o
 test_subskeleton.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o test_subskeleton.bpf.o
 test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o
 test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
+xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
 
 LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
@@ -576,6 +576,10 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
+$(OUTPUT)/xskxceiver: xskxceiver.c $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+	$(call msg,BINARY,,$@)
+	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
 # Make sure we are able to include and link libbpf against c++.
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
new file mode 100644
index 000000000000..698176882ac6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Intel */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_XSKMAP);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} xsk SEC(".maps");
+
+SEC("xdp") int xsk_def_prog(struct xdp_md *xdp)
+{
+	return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index 9ed31d280e48..dc6b47280ec4 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -267,87 +267,37 @@ out_umem_alloc:
 	return err;
 }
 
-static int __xsk_load_xdp_prog(int xsk_map_fd)
+int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
 {
-	static const int log_buf_size = 16 * 1024;
-	char log_buf[log_buf_size];
 	int prog_fd;
 
-	/* This is the post-5.3 kernel C-program:
-	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
-	 * {
-	 *     return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS);
-	 * }
-	 */
-	struct bpf_insn prog[] = {
-		/* r2 = *(u32 *)(r1 + 16) */
-		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
-		/* r1 = xskmap[] */
-		BPF_LD_MAP_FD(BPF_REG_1, xsk_map_fd),
-		/* r3 = XDP_PASS */
-		BPF_MOV64_IMM(BPF_REG_3, 2),
-		/* call bpf_redirect_map */
-		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
-		BPF_EXIT_INSN(),
-	};
-	size_t insns_cnt = ARRAY_SIZE(prog);
-	LIBBPF_OPTS(bpf_prog_load_opts, opts,
-		.log_buf = log_buf,
-		.log_size = log_buf_size,
-	);
-
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause",
-				prog, insns_cnt, &opts);
-	if (prog_fd < 0)
-		pr_warn("BPF log buffer:\n%s", log_buf);
-
-	return prog_fd;
+	prog_fd = bpf_program__fd(prog);
+	return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
 }
 
-int xsk_attach_xdp_program(int ifindex, int prog_fd, u32 xdp_flags)
+void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
 {
-	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
-	__u32 prog_id = 0;
-	int link_fd;
-	int err;
-
-	err = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id);
-	if (err) {
-		pr_warn("getting XDP prog id failed\n");
-		return err;
-	}
-
-	/* If there's a netlink-based XDP prog loaded on interface, bail out
-	 * and ask user to do the removal by himself
-	 */
-	if (prog_id) {
-		pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
-		return -EINVAL;
-	}
-
-	opts.flags = xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
+	bpf_xdp_detach(ifindex, xdp_flags, NULL);
+}
 
-	link_fd = bpf_link_create(prog_fd, ifindex, BPF_XDP, &opts);
-	if (link_fd < 0)
-		pr_warn("bpf_link_create failed: %s\n", strerror(errno));
+void xsk_clear_xskmap(struct bpf_map *map)
+{
+	u32 index = 0;
+	int map_fd;
 
-	return link_fd;
+	map_fd = bpf_map__fd(map);
+	bpf_map_delete_elem(map_fd, &index);
 }
 
-int xsk_load_xdp_program(int *xsk_map_fd, int *prog_fd)
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk)
 {
-	*xsk_map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", sizeof(int), sizeof(int),
-				     XSKMAP_SIZE, NULL);
-	if (*xsk_map_fd < 0)
-		return *xsk_map_fd;
-
-	*prog_fd = __xsk_load_xdp_prog(*xsk_map_fd);
-	if (*prog_fd < 0) {
-		close(*xsk_map_fd);
-		return *prog_fd;
-	}
+	int map_fd, sock_fd;
+	u32 index = 0;
 
-	return 0;
+	map_fd = bpf_map__fd(map);
+	sock_fd = xsk_socket__fd(xsk);
+
+	return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
 }
 
 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index bd5b55ad9f8a..5624d31b8db7 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -197,8 +197,10 @@ struct xsk_umem_config {
 	__u32 flags;
 };
 
-int xsk_load_xdp_program(int *xsk_map_fd, int *prog_fd);
-int xsk_attach_xdp_program(int ifindex, int prog_fd, u32 xdp_flags);
+int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags);
+void xsk_detach_xdp_program(int ifindex, u32 xdp_flags);
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk);
+void xsk_clear_xskmap(struct bpf_map *map);
 
 struct xsk_socket_config {
 	__u32 rx_size;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 693f8a63f718..d69100267f70 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1207,7 +1207,7 @@ static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobje
 {
 	xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true);
 	ifobject->xsk = &ifobject->xsk_arr[0];
-	ifobject->xsk_map_fd = test->ifobj_rx->xsk_map_fd;
+	ifobject->xskmap = test->ifobj_rx->xskmap;
 	memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
 }
 
@@ -1247,9 +1247,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-	u32 queue_id = 0;
-	int ret, fd;
 	void *bufs;
+	int ret;
 
 	if (ifobject->umem->unaligned_mode)
 		mmap_flags |= MAP_HUGETLB;
@@ -1274,8 +1273,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (!ifobject->rx_on)
 		return;
 
-	fd = xsk_socket__fd(ifobject->xsk->xsk);
-	ret = bpf_map_update_elem(ifobject->xsk_map_fd, &queue_id, &fd, 0);
+	ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
 	if (ret)
 		exit_with_error(errno);
 }
@@ -1309,18 +1307,17 @@ static void *worker_testapp_validate_rx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_rx;
-	int id = 0, err, fd = xsk_socket__fd(ifobject->xsk->xsk);
 	struct pollfd fds = { };
-	u32 queue_id = 0;
+	int err;
 
 	if (test->current_step == 1) {
 		thread_common_ops(test, ifobject);
 	} else {
-		bpf_map_delete_elem(ifobject->xsk_map_fd, &id);
-		err = bpf_map_update_elem(ifobject->xsk_map_fd, &queue_id, &fd, 0);
+		xsk_clear_xskmap(ifobject->xskmap);
+		err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk);
 		if (err) {
-			printf("Error: Failed to update xskmap, error %s\n", strerror(err));
-			exit_with_error(err);
+			printf("Error: Failed to update xskmap, error %s\n", strerror(-err));
+			exit_with_error(-err);
 		}
 	}
 
@@ -1390,10 +1387,8 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct
 	pthread_join(t0, NULL);
 
 	if (test->total_steps == test->current_step || test->fail) {
-		u32 queue_id = 0;
-
 		xsk_socket__delete(ifobj->xsk->xsk);
-		bpf_map_delete_elem(ifobj->xsk_map_fd, &queue_id);
+		xsk_clear_xskmap(ifobj->xskmap);
 		testapp_clean_xsk_umem(ifobj);
 	}
 
@@ -1482,14 +1477,14 @@ static void testapp_bidi(struct test_spec *test)
 
 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
 {
-	int ret, queue_id = 0, fd = xsk_socket__fd(ifobj_rx->xsk->xsk);
+	int ret;
 
 	xsk_socket__delete(ifobj_tx->xsk->xsk);
 	xsk_socket__delete(ifobj_rx->xsk->xsk);
 	ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
 	ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
 
-	ret = bpf_map_update_elem(ifobj_rx->xsk_map_fd, &queue_id, &fd, 0);
+	ret = xsk_update_xskmap(ifobj_rx->xskmap, ifobj_rx->xsk->xsk);
 	if (ret)
 		exit_with_error(errno);
 }
@@ -1651,12 +1646,26 @@ static void testapp_invalid_desc(struct test_spec *test)
 	pkt_stream_restore_default(test);
 }
 
+static int xsk_load_xdp_programs(struct ifobject *ifobj)
+{
+	ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
+	if (libbpf_get_error(ifobj->xdp_progs))
+		return libbpf_get_error(ifobj->xdp_progs);
+
+	return 0;
+}
+
+static void xsk_unload_xdp_programs(struct ifobject *ifobj)
+{
+	xsk_xdp_progs__destroy(ifobj->xdp_progs);
+}
+
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
 		       const char *dst_ip, const char *src_ip, const u16 dst_port,
 		       const u16 src_port, thread_func_t func_ptr, bool load_xdp)
 {
-	int xsk_map_fd, prog_fd, err;
 	struct in_addr ip;
+	int err;
 
 	memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
 	memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
@@ -1675,20 +1684,20 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 	if (!load_xdp)
 		return;
 
-	err = xsk_load_xdp_program(&xsk_map_fd, &prog_fd);
+	err = xsk_load_xdp_programs(ifobj);
 	if (err) {
 		printf("Error loading XDP program\n");
 		exit_with_error(err);
 	}
 
-	ifobj->xsk_map_fd = xsk_map_fd;
-	ifobj->prog_fd = prog_fd;
 	ifobj->xdp_flags = mode_to_xdp_flags(TEST_MODE_SKB);
-	ifobj->link_fd = xsk_attach_xdp_program(ifobj->ifindex, prog_fd, ifobj->xdp_flags);
-	if (ifobj->link_fd < 0) {
+	err = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_def_prog, ifobj->ifindex,
+				     ifobj->xdp_flags);
+	if (err) {
 		printf("Error attaching XDP program\n");
-		exit_with_error(ifobj->link_fd);
+		exit_with_error(-err);
 	}
+	ifobj->xskmap = ifobj->xdp_progs->maps.xsk;
 }
 
 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
@@ -1823,9 +1832,6 @@ out_xsk_arr:
 
 static void ifobject_delete(struct ifobject *ifobj)
 {
-	close(ifobj->prog_fd);
-	close(ifobj->xsk_map_fd);
-
 	free(ifobj->umem);
 	free(ifobj->xsk_arr);
 	free(ifobj);
@@ -1864,13 +1870,15 @@ static void change_to_drv_mode(struct ifobject *ifobj)
 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
 	int ret;
 
-	close(ifobj->link_fd);
-	ifobj->link_fd = xsk_attach_xdp_program(ifobj->ifindex, ifobj->prog_fd,
-						XDP_FLAGS_DRV_MODE);
-	if (ifobj->link_fd < 0) {
+	xsk_detach_xdp_program(ifobj->ifindex, ifobj->xdp_flags);
+	ifobj->xdp_flags = XDP_FLAGS_DRV_MODE;
+	ret = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_def_prog, ifobj->ifindex,
+				     ifobj->xdp_flags);
+	if (ret) {
 		ksft_print_msg("Error attaching XDP program\n");
-		exit_with_error(-ifobj->link_fd);
+		exit_with_error(-ret);
 	}
+	ifobj->xskmap = ifobj->xdp_progs->maps.xsk;
 
 	ret = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &opts);
 	if (ret)
@@ -1955,6 +1963,8 @@ int main(int argc, char **argv)
 
 	pkt_stream_delete(tx_pkt_stream_default);
 	pkt_stream_delete(rx_pkt_stream_default);
+	xsk_unload_xdp_programs(ifobj_tx);
+	xsk_unload_xdp_programs(ifobj_rx);
 	ifobject_delete(ifobj_tx);
 	ifobject_delete(ifobj_rx);
 
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index b2ba877b1966..70b3e5d1d40c 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -5,6 +5,8 @@
 #ifndef XSKXCEIVER_H_
 #define XSKXCEIVER_H_
 
+#include "xsk_xdp_progs.skel.h"
+
 #ifndef SOL_XDP
 #define SOL_XDP 283
 #endif
@@ -138,9 +140,8 @@ struct ifobject {
 	thread_func_t func_ptr;
 	validation_func_t validation_func;
 	struct pkt_stream *pkt_stream;
-	int xsk_map_fd;
-	int prog_fd;
-	int link_fd;
+	struct xsk_xdp_progs *xdp_progs;
+	struct bpf_map *xskmap;
 	int ifindex;
 	u32 dst_ip;
 	u32 src_ip;
-- 
cgit v1.2.3


From 80bea9acabb7690042f8efe25f208ffc67032aac Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:23 +0100
Subject: selftests/xsk: add test when some packets are XDP_DROPed

Add a new test where some of the packets are not passed to the AF_XDP
socket and instead get a XDP_DROP verdict. This is important as it
tests the recycling mechanism of the buffer pool. If a packet is not
sent to the AF_XDP socket, the buffer the packet resides in is instead
recycled so it can be used again without the round-trip to user
space. The test introduces a new XDP program that drops every other
packet.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-13-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/xsk_xdp_progs.c | 11 ++++++++
 tools/testing/selftests/bpf/xskxceiver.c          | 31 +++++++++++++++++++++++
 tools/testing/selftests/bpf/xskxceiver.h          |  1 +
 3 files changed, 43 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index 698176882ac6..744a01d0e57d 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -11,9 +11,20 @@ struct {
 	__uint(value_size, sizeof(int));
 } xsk SEC(".maps");
 
+static unsigned int idx;
+
 SEC("xdp") int xsk_def_prog(struct xdp_md *xdp)
 {
 	return bpf_redirect_map(&xsk, 0, XDP_DROP);
 }
 
+SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp)
+{
+	/* Drop every other packet */
+	if (idx++ % 2)
+		return XDP_DROP;
+
+	return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index d69100267f70..a33f11b4c598 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1646,6 +1646,34 @@ static void testapp_invalid_desc(struct test_spec *test)
 	pkt_stream_restore_default(test);
 }
 
+static void testapp_xdp_drop(struct test_spec *test)
+{
+	struct ifobject *ifobj = test->ifobj_rx;
+	int err;
+
+	test_spec_set_name(test, "XDP_DROP_HALF");
+	xsk_detach_xdp_program(ifobj->ifindex, ifobj->xdp_flags);
+	err = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_xdp_drop, ifobj->ifindex,
+				     ifobj->xdp_flags);
+	if (err) {
+		printf("Error attaching XDP_DROP program\n");
+		test->fail = true;
+		return;
+	}
+
+	pkt_stream_receive_half(test);
+	testapp_validate_traffic(test);
+
+	pkt_stream_restore_default(test);
+	xsk_detach_xdp_program(ifobj->ifindex, ifobj->xdp_flags);
+	err = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_def_prog, ifobj->ifindex,
+				     ifobj->xdp_flags);
+	if (err) {
+		printf("Error restoring default XDP program\n");
+		exit_with_error(-err);
+	}
+}
+
 static int xsk_load_xdp_programs(struct ifobject *ifobj)
 {
 	ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
@@ -1796,6 +1824,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 	case TEST_TYPE_HEADROOM:
 		testapp_headroom(test);
 		break;
+	case TEST_TYPE_XDP_DROP_HALF:
+		testapp_xdp_drop(test);
+		break;
 	default:
 		break;
 	}
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 70b3e5d1d40c..a4daa5134fc0 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -87,6 +87,7 @@ enum test_type {
 	TEST_TYPE_STATS_RX_FULL,
 	TEST_TYPE_STATS_FILL_EMPTY,
 	TEST_TYPE_BPF_RES,
+	TEST_TYPE_XDP_DROP_HALF,
 	TEST_TYPE_MAX
 };
 
-- 
cgit v1.2.3


From 7f881984073a717cf09f82882b7b8062da6773d7 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:24 +0100
Subject: selftests/xsk: merge dual and single thread dispatchers

Make the thread dispatching code common by unifying the dual and
single thread dispatcher code. This so we do not have to add code in
two places in upcoming commits.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-14-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xskxceiver.c | 120 ++++++++++++++-----------------
 1 file changed, 54 insertions(+), 66 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index a33f11b4c598..11e4f29d40f7 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1356,83 +1356,59 @@ static void handler(int signum)
 	pthread_exit(NULL);
 }
 
-static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj,
-						  enum test_type type)
+static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
+				      struct ifobject *ifobj2)
 {
-	bool old_shared_umem = ifobj->shared_umem;
-	pthread_t t0;
+	pthread_t t0, t1;
 
-	if (pthread_barrier_init(&barr, NULL, 2))
-		exit_with_error(errno);
+	if (ifobj2)
+		if (pthread_barrier_init(&barr, NULL, 2))
+			exit_with_error(errno);
 
 	test->current_step++;
-	if (type == TEST_TYPE_POLL_RXQ_TMOUT)
-		pkt_stream_reset(ifobj->pkt_stream);
+	pkt_stream_reset(ifobj1->pkt_stream);
 	pkts_in_flight = 0;
 
-	test->ifobj_rx->shared_umem = false;
-	test->ifobj_tx->shared_umem = false;
-
 	signal(SIGUSR1, handler);
-	/* Spawn thread */
-	pthread_create(&t0, NULL, ifobj->func_ptr, test);
+	/*Spawn RX thread */
+	pthread_create(&t0, NULL, ifobj1->func_ptr, test);
 
-	if (type != TEST_TYPE_POLL_TXQ_TMOUT)
+	if (ifobj2) {
 		pthread_barrier_wait(&barr);
+		if (pthread_barrier_destroy(&barr))
+			exit_with_error(errno);
 
-	if (pthread_barrier_destroy(&barr))
-		exit_with_error(errno);
+		/*Spawn TX thread */
+		pthread_create(&t1, NULL, ifobj2->func_ptr, test);
 
-	pthread_kill(t0, SIGUSR1);
-	pthread_join(t0, NULL);
+		pthread_join(t1, NULL);
+	}
+
+	if (!ifobj2)
+		pthread_kill(t0, SIGUSR1);
+	else
+		pthread_join(t0, NULL);
 
 	if (test->total_steps == test->current_step || test->fail) {
-		xsk_socket__delete(ifobj->xsk->xsk);
-		xsk_clear_xskmap(ifobj->xskmap);
-		testapp_clean_xsk_umem(ifobj);
+		if (ifobj2)
+			xsk_socket__delete(ifobj2->xsk->xsk);
+		xsk_socket__delete(ifobj1->xsk->xsk);
+		testapp_clean_xsk_umem(ifobj1);
+		if (ifobj2 && !ifobj2->shared_umem)
+			testapp_clean_xsk_umem(ifobj2);
 	}
 
-	test->ifobj_rx->shared_umem = old_shared_umem;
-	test->ifobj_tx->shared_umem = old_shared_umem;
-
 	return !!test->fail;
 }
 
 static int testapp_validate_traffic(struct test_spec *test)
 {
-	struct ifobject *ifobj_tx = test->ifobj_tx;
-	struct ifobject *ifobj_rx = test->ifobj_rx;
-	pthread_t t0, t1;
-
-	if (pthread_barrier_init(&barr, NULL, 2))
-		exit_with_error(errno);
-
-	test->current_step++;
-	pkt_stream_reset(ifobj_rx->pkt_stream);
-	pkts_in_flight = 0;
-
-	/*Spawn RX thread */
-	pthread_create(&t0, NULL, ifobj_rx->func_ptr, test);
-
-	pthread_barrier_wait(&barr);
-	if (pthread_barrier_destroy(&barr))
-		exit_with_error(errno);
-
-	/*Spawn TX thread */
-	pthread_create(&t1, NULL, ifobj_tx->func_ptr, test);
-
-	pthread_join(t1, NULL);
-	pthread_join(t0, NULL);
-
-	if (test->total_steps == test->current_step || test->fail) {
-		xsk_socket__delete(ifobj_tx->xsk->xsk);
-		xsk_socket__delete(ifobj_rx->xsk->xsk);
-		testapp_clean_xsk_umem(ifobj_rx);
-		if (!ifobj_tx->shared_umem)
-			testapp_clean_xsk_umem(ifobj_tx);
-	}
+	return __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+}
 
-	return !!test->fail;
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
+{
+	return __testapp_validate_traffic(test, ifobj, NULL);
 }
 
 static void testapp_teardown(struct test_spec *test)
@@ -1674,6 +1650,26 @@ static void testapp_xdp_drop(struct test_spec *test)
 	}
 }
 
+static void testapp_poll_txq_tmout(struct test_spec *test)
+{
+	test_spec_set_name(test, "POLL_TXQ_FULL");
+
+	test->ifobj_tx->use_poll = true;
+	/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+	test->ifobj_tx->umem->frame_size = 2048;
+	pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
+	testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+
+	pkt_stream_restore_default(test);
+}
+
+static void testapp_poll_rxq_tmout(struct test_spec *test)
+{
+	test_spec_set_name(test, "POLL_RXQ_EMPTY");
+	test->ifobj_rx->use_poll = true;
+	testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+}
+
 static int xsk_load_xdp_programs(struct ifobject *ifobj)
 {
 	ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
@@ -1784,18 +1780,10 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		testapp_validate_traffic(test);
 		break;
 	case TEST_TYPE_POLL_TXQ_TMOUT:
-		test_spec_set_name(test, "POLL_TXQ_FULL");
-		test->ifobj_tx->use_poll = true;
-		/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
-		test->ifobj_tx->umem->frame_size = 2048;
-		pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
-		testapp_validate_traffic_single_thread(test, test->ifobj_tx, type);
-		pkt_stream_restore_default(test);
+		testapp_poll_txq_tmout(test);
 		break;
 	case TEST_TYPE_POLL_RXQ_TMOUT:
-		test_spec_set_name(test, "POLL_RXQ_EMPTY");
-		test->ifobj_rx->use_poll = true;
-		testapp_validate_traffic_single_thread(test, test->ifobj_rx, type);
+		testapp_poll_rxq_tmout(test);
 		break;
 	case TEST_TYPE_ALIGNED_INV_DESC:
 		test_spec_set_name(test, "ALIGNED_INV_DESC");
-- 
cgit v1.2.3


From e67b2554f301b3e12322230ebb81ac7b7892f1a4 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:25 +0100
Subject: selftests/xsk: automatically restore packet stream

Automatically restore the default packet stream if needed at the end
of each test. This so that test writers do not forget to do this.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-15-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xskxceiver.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 11e4f29d40f7..66863504c76a 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1501,8 +1501,6 @@ static void testapp_stats_tx_invalid_descs(struct test_spec *test)
 	pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
 	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
 	testapp_validate_traffic(test);
-
-	pkt_stream_restore_default(test);
 }
 
 static void testapp_stats_rx_full(struct test_spec *test)
@@ -1518,8 +1516,6 @@ static void testapp_stats_rx_full(struct test_spec *test)
 	test->ifobj_rx->release_rx = false;
 	test->ifobj_rx->validation_func = validate_rx_full;
 	testapp_validate_traffic(test);
-
-	pkt_stream_restore_default(test);
 }
 
 static void testapp_stats_fill_empty(struct test_spec *test)
@@ -1534,8 +1530,6 @@ static void testapp_stats_fill_empty(struct test_spec *test)
 	test->ifobj_rx->use_fill_ring = false;
 	test->ifobj_rx->validation_func = validate_fill_empty;
 	testapp_validate_traffic(test);
-
-	pkt_stream_restore_default(test);
 }
 
 /* Simple test */
@@ -1568,7 +1562,6 @@ static bool testapp_unaligned(struct test_spec *test)
 	test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
 	testapp_validate_traffic(test);
 
-	pkt_stream_restore_default(test);
 	return true;
 }
 
@@ -1578,7 +1571,6 @@ static void testapp_single_pkt(struct test_spec *test)
 
 	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
 	testapp_validate_traffic(test);
-	pkt_stream_restore_default(test);
 }
 
 static void testapp_invalid_desc(struct test_spec *test)
@@ -1619,7 +1611,6 @@ static void testapp_invalid_desc(struct test_spec *test)
 
 	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
 	testapp_validate_traffic(test);
-	pkt_stream_restore_default(test);
 }
 
 static void testapp_xdp_drop(struct test_spec *test)
@@ -1640,7 +1631,6 @@ static void testapp_xdp_drop(struct test_spec *test)
 	pkt_stream_receive_half(test);
 	testapp_validate_traffic(test);
 
-	pkt_stream_restore_default(test);
 	xsk_detach_xdp_program(ifobj->ifindex, ifobj->xdp_flags);
 	err = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_def_prog, ifobj->ifindex,
 				     ifobj->xdp_flags);
@@ -1659,8 +1649,6 @@ static void testapp_poll_txq_tmout(struct test_spec *test)
 	test->ifobj_tx->umem->frame_size = 2048;
 	pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
 	testapp_validate_traffic_single_thread(test, test->ifobj_tx);
-
-	pkt_stream_restore_default(test);
 }
 
 static void testapp_poll_rxq_tmout(struct test_spec *test)
@@ -1766,8 +1754,6 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		test->ifobj_rx->umem->frame_size = 2048;
 		pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE);
 		testapp_validate_traffic(test);
-
-		pkt_stream_restore_default(test);
 		break;
 	case TEST_TYPE_RX_POLL:
 		test->ifobj_rx->use_poll = true;
@@ -1822,6 +1808,7 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 	if (!test->fail)
 		ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
 				      test->name);
+	pkt_stream_restore_default(test);
 }
 
 static struct ifobject *ifobject_create(void)
-- 
cgit v1.2.3


From 7d8319a7cc668607aa054ea2da0bb730538a510b Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 11 Jan 2023 10:35:26 +0100
Subject: selftests/xsk: automatically switch XDP programs

Implement automatic switching of XDP programs and execution modes if
needed by a test. This makes it much simpler to write a test as it
only has to say what XDP program it needs if it is not the default
one. This also makes it possible to remove the initial explicit
attachment of the XDP program as well as the explicit mode switch in
the code. These are now handled by the same code that just checks if a
switch is necessary, so no special cases are needed.

The default XDP program for all tests is one that sends all packets to
the AF_XDP socket. If you need another one, please use the new
function test_spec_set_xdp_prog() to specify what XDP programs and
maps to use for this test.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/r/20230111093526.11682-16-magnus.karlsson@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xsk.c        |  19 +++++
 tools/testing/selftests/bpf/xsk.h        |   1 +
 tools/testing/selftests/bpf/xskxceiver.c | 137 ++++++++++++++++---------------
 tools/testing/selftests/bpf/xskxceiver.h |   7 +-
 4 files changed, 96 insertions(+), 68 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index dc6b47280ec4..687d83e707f8 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -267,6 +267,25 @@ out_umem_alloc:
 	return err;
 }
 
+bool xsk_is_in_mode(u32 ifindex, int mode)
+{
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+	int ret;
+
+	ret = bpf_xdp_query(ifindex, mode, &opts);
+	if (ret) {
+		printf("XDP mode query returned error %s\n", strerror(errno));
+		return false;
+	}
+
+	if (mode == XDP_FLAGS_DRV_MODE)
+		return opts.attach_mode == XDP_ATTACHED_DRV;
+	else if (mode == XDP_FLAGS_SKB_MODE)
+		return opts.attach_mode == XDP_ATTACHED_SKB;
+
+	return false;
+}
+
 int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
 {
 	int prog_fd;
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 5624d31b8db7..04ed8b544712 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -201,6 +201,7 @@ int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
 void xsk_detach_xdp_program(int ifindex, u32 xdp_flags);
 int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk);
 void xsk_clear_xskmap(struct bpf_map *map);
+bool xsk_is_in_mode(u32 ifindex, int mode);
 
 struct xsk_socket_config {
 	__u32 rx_size;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 66863504c76a..a17655107a94 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -96,6 +96,8 @@
 #include <time.h>
 #include <unistd.h>
 #include <stdatomic.h>
+
+#include "xsk_xdp_progs.skel.h"
 #include "xsk.h"
 #include "xskxceiver.h"
 #include <bpf/bpf.h>
@@ -356,7 +358,6 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
 	xsk = calloc(1, sizeof(struct xsk_socket_info));
 	if (!xsk)
 		goto out;
-	ifobject->xdp_flags = XDP_FLAGS_DRV_MODE;
 	ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
 	ifobject->rx_on = true;
 	xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
@@ -493,6 +494,10 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	test->total_steps = 1;
 	test->nb_sockets = 1;
 	test->fail = false;
+	test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
+	test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
+	test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
+	test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
 }
 
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
@@ -532,6 +537,16 @@ static void test_spec_set_name(struct test_spec *test, const char *name)
 	strncpy(test->name, name, MAX_TEST_NAME_SIZE);
 }
 
+static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
+				   struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
+				   struct bpf_map *xskmap_tx)
+{
+	test->xdp_prog_rx = xdp_prog_rx;
+	test->xdp_prog_tx = xdp_prog_tx;
+	test->xskmap_rx = xskmap_rx;
+	test->xskmap_tx = xskmap_tx;
+}
+
 static void pkt_stream_reset(struct pkt_stream *pkt_stream)
 {
 	if (pkt_stream)
@@ -1356,6 +1371,47 @@ static void handler(int signum)
 	pthread_exit(NULL);
 }
 
+static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj)
+{
+	return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
+}
+
+static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
+			     struct bpf_map *xskmap, enum test_mode mode)
+{
+	int err;
+
+	xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
+	err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
+	if (err) {
+		printf("Error attaching XDP program\n");
+		exit_with_error(-err);
+	}
+
+	if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
+		if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
+			ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+			exit_with_error(EINVAL);
+		}
+
+	ifobj->xdp_prog = xdp_prog;
+	ifobj->xskmap = xskmap;
+	ifobj->mode = mode;
+}
+
+static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
+				 struct ifobject *ifobj_tx)
+{
+	if (xdp_prog_changed(test, ifobj_rx))
+		xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
+
+	if (!ifobj_tx || ifobj_tx->shared_umem)
+		return;
+
+	if (xdp_prog_changed(test, ifobj_tx))
+		xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
+}
+
 static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
 				      struct ifobject *ifobj2)
 {
@@ -1403,7 +1459,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i
 
 static int testapp_validate_traffic(struct test_spec *test)
 {
-	return __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+	struct ifobject *ifobj_rx = test->ifobj_rx;
+	struct ifobject *ifobj_tx = test->ifobj_tx;
+
+	xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
+	return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
 }
 
 static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
@@ -1446,7 +1506,7 @@ static void testapp_bidi(struct test_spec *test)
 
 	print_verbose("Switching Tx/Rx vectors\n");
 	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-	testapp_validate_traffic(test);
+	__testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
 
 	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
 }
@@ -1615,29 +1675,15 @@ static void testapp_invalid_desc(struct test_spec *test)
 
 static void testapp_xdp_drop(struct test_spec *test)
 {
-	struct ifobject *ifobj = test->ifobj_rx;
-	int err;
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
 
 	test_spec_set_name(test, "XDP_DROP_HALF");
-	xsk_detach_xdp_program(ifobj->ifindex, ifobj->xdp_flags);
-	err = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_xdp_drop, ifobj->ifindex,
-				     ifobj->xdp_flags);
-	if (err) {
-		printf("Error attaching XDP_DROP program\n");
-		test->fail = true;
-		return;
-	}
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
 
 	pkt_stream_receive_half(test);
 	testapp_validate_traffic(test);
-
-	xsk_detach_xdp_program(ifobj->ifindex, ifobj->xdp_flags);
-	err = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_def_prog, ifobj->ifindex,
-				     ifobj->xdp_flags);
-	if (err) {
-		printf("Error restoring default XDP program\n");
-		exit_with_error(-err);
-	}
 }
 
 static void testapp_poll_txq_tmout(struct test_spec *test)
@@ -1674,7 +1720,7 @@ static void xsk_unload_xdp_programs(struct ifobject *ifobj)
 
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
 		       const char *dst_ip, const char *src_ip, const u16 dst_port,
-		       const u16 src_port, thread_func_t func_ptr, bool load_xdp)
+		       const u16 src_port, thread_func_t func_ptr)
 {
 	struct in_addr ip;
 	int err;
@@ -1693,23 +1739,11 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 
 	ifobj->func_ptr = func_ptr;
 
-	if (!load_xdp)
-		return;
-
 	err = xsk_load_xdp_programs(ifobj);
 	if (err) {
 		printf("Error loading XDP program\n");
 		exit_with_error(err);
 	}
-
-	ifobj->xdp_flags = mode_to_xdp_flags(TEST_MODE_SKB);
-	err = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_def_prog, ifobj->ifindex,
-				     ifobj->xdp_flags);
-	if (err) {
-		printf("Error attaching XDP program\n");
-		exit_with_error(-err);
-	}
-	ifobj->xskmap = ifobj->xdp_progs->maps.xsk;
 }
 
 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
@@ -1871,31 +1905,6 @@ static bool is_xdp_supported(int ifindex)
 	return true;
 }
 
-static void change_to_drv_mode(struct ifobject *ifobj)
-{
-	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-	int ret;
-
-	xsk_detach_xdp_program(ifobj->ifindex, ifobj->xdp_flags);
-	ifobj->xdp_flags = XDP_FLAGS_DRV_MODE;
-	ret = xsk_attach_xdp_program(ifobj->xdp_progs->progs.xsk_def_prog, ifobj->ifindex,
-				     ifobj->xdp_flags);
-	if (ret) {
-		ksft_print_msg("Error attaching XDP program\n");
-		exit_with_error(-ret);
-	}
-	ifobj->xskmap = ifobj->xdp_progs->maps.xsk;
-
-	ret = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &opts);
-	if (ret)
-		exit_with_error(errno);
-
-	if (opts.attach_mode != XDP_ATTACHED_DRV) {
-		ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
-		exit_with_error(EINVAL);
-	}
-}
-
 int main(int argc, char **argv)
 {
 	struct pkt_stream *rx_pkt_stream_default;
@@ -1936,9 +1945,9 @@ int main(int argc, char **argv)
 	}
 
 	init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
-		   worker_testapp_validate_rx, true);
+		   worker_testapp_validate_rx);
 	init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
-		   worker_testapp_validate_tx, !shared_netdev);
+		   worker_testapp_validate_tx);
 
 	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
 	tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
@@ -1951,12 +1960,6 @@ int main(int argc, char **argv)
 	ksft_set_plan(modes * TEST_TYPE_MAX);
 
 	for (i = 0; i < modes; i++) {
-		if (i == TEST_MODE_DRV) {
-			change_to_drv_mode(ifobj_rx);
-			if (!shared_netdev)
-				change_to_drv_mode(ifobj_tx);
-		}
-
 		for (j = 0; j < TEST_TYPE_MAX; j++) {
 			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
 			run_pkt_test(&test, i, j);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index a4daa5134fc0..3e8ec7d8ec32 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -143,10 +143,11 @@ struct ifobject {
 	struct pkt_stream *pkt_stream;
 	struct xsk_xdp_progs *xdp_progs;
 	struct bpf_map *xskmap;
+	struct bpf_program *xdp_prog;
+	enum test_mode mode;
 	int ifindex;
 	u32 dst_ip;
 	u32 src_ip;
-	u32 xdp_flags;
 	u32 bind_flags;
 	u16 src_port;
 	u16 dst_port;
@@ -166,6 +167,10 @@ struct test_spec {
 	struct ifobject *ifobj_rx;
 	struct pkt_stream *tx_pkt_stream_default;
 	struct pkt_stream *rx_pkt_stream_default;
+	struct bpf_program *xdp_prog_rx;
+	struct bpf_program *xdp_prog_tx;
+	struct bpf_map *xskmap_rx;
+	struct bpf_map *xskmap_tx;
 	u16 total_steps;
 	u16 current_step;
 	u16 nb_sockets;
-- 
cgit v1.2.3


From 5c338112e48ab1937f99e4a8990a82c999fda126 Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Date: Tue, 10 Jan 2023 10:15:15 +0000
Subject: test/vsock: rework message bounds test

This updates message bound test making it more complex. Instead of
sending 1 bytes messages with one MSG_EOR bit, it sends messages of
random length(one half of messages are smaller than page size, second
half are bigger) with random number of MSG_EOR bits set. Receiver
also don't know total number of messages.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/vsock/control.c    |  28 +++++++++
 tools/testing/vsock/control.h    |   2 +
 tools/testing/vsock/util.c       |  13 ++++
 tools/testing/vsock/util.h       |   1 +
 tools/testing/vsock/vsock_test.c | 128 ++++++++++++++++++++++++++++++++++-----
 5 files changed, 157 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c
index 4874872fc5a3..d2deb4b15b94 100644
--- a/tools/testing/vsock/control.c
+++ b/tools/testing/vsock/control.c
@@ -141,6 +141,34 @@ void control_writeln(const char *str)
 	timeout_end();
 }
 
+void control_writeulong(unsigned long value)
+{
+	char str[32];
+
+	if (snprintf(str, sizeof(str), "%lu", value) >= sizeof(str)) {
+		perror("snprintf");
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln(str);
+}
+
+unsigned long control_readulong(void)
+{
+	unsigned long value;
+	char *str;
+
+	str = control_readln();
+
+	if (!str)
+		exit(EXIT_FAILURE);
+
+	value = strtoul(str, NULL, 10);
+	free(str);
+
+	return value;
+}
+
 /* Return the next line from the control socket (without the trailing newline).
  *
  * The program terminates if a timeout occurs.
diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h
index 51814b4f9ac1..c1f77fdb2c7a 100644
--- a/tools/testing/vsock/control.h
+++ b/tools/testing/vsock/control.h
@@ -9,7 +9,9 @@ void control_init(const char *control_host, const char *control_port,
 void control_cleanup(void);
 void control_writeln(const char *str);
 char *control_readln(void);
+unsigned long control_readulong(void);
 void control_expectln(const char *str);
 bool control_cmpln(char *line, const char *str, bool fail);
+void control_writeulong(unsigned long value);
 
 #endif /* CONTROL_H */
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 2acbb7703c6a..01b636d3039a 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -395,3 +395,16 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len,
 
 	test_cases[test_id].skip = true;
 }
+
+unsigned long hash_djb2(const void *data, size_t len)
+{
+	unsigned long hash = 5381;
+	int i = 0;
+
+	while (i < len) {
+		hash = ((hash << 5) + hash) + ((unsigned char *)data)[i];
+		i++;
+	}
+
+	return hash;
+}
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index a3375ad2fb7f..fb99208a95ea 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -49,4 +49,5 @@ void run_tests(const struct test_case *test_cases,
 void list_tests(const struct test_case *test_cases);
 void skip_test(struct test_case *test_cases, size_t test_cases_len,
 	       const char *test_id_str);
+unsigned long hash_djb2(const void *data, size_t len);
 #endif /* UTIL_H */
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index bb6d691cb30d..26c38ad9d07b 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -284,10 +284,14 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
 	close(fd);
 }
 
-#define MESSAGES_CNT 7
-#define MSG_EOR_IDX (MESSAGES_CNT / 2)
+#define SOCK_BUF_SIZE (2 * 1024 * 1024)
+#define MAX_MSG_SIZE (32 * 1024)
+
 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 {
+	unsigned long curr_hash;
+	int page_size;
+	int msg_count;
 	int fd;
 
 	fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
@@ -296,18 +300,79 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 		exit(EXIT_FAILURE);
 	}
 
-	/* Send several messages, one with MSG_EOR flag */
-	for (int i = 0; i < MESSAGES_CNT; i++)
-		send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0);
+	/* Wait, until receiver sets buffer size. */
+	control_expectln("SRVREADY");
+
+	curr_hash = 0;
+	page_size = getpagesize();
+	msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE;
+
+	for (int i = 0; i < msg_count; i++) {
+		ssize_t send_size;
+		size_t buf_size;
+		int flags;
+		void *buf;
+
+		/* Use "small" buffers and "big" buffers. */
+		if (i & 1)
+			buf_size = page_size +
+					(rand() % (MAX_MSG_SIZE - page_size));
+		else
+			buf_size = 1 + (rand() % page_size);
+
+		buf = malloc(buf_size);
+
+		if (!buf) {
+			perror("malloc");
+			exit(EXIT_FAILURE);
+		}
+
+		memset(buf, rand() & 0xff, buf_size);
+		/* Set at least one MSG_EOR + some random. */
+		if (i == (msg_count / 2) || (rand() & 1)) {
+			flags = MSG_EOR;
+			curr_hash++;
+		} else {
+			flags = 0;
+		}
+
+		send_size = send(fd, buf, buf_size, flags);
+
+		if (send_size < 0) {
+			perror("send");
+			exit(EXIT_FAILURE);
+		}
+
+		if (send_size != buf_size) {
+			fprintf(stderr, "Invalid send size\n");
+			exit(EXIT_FAILURE);
+		}
+
+		/*
+		 * Hash sum is computed at both client and server in
+		 * the same way:
+		 * H += hash('message data')
+		 * Such hash "controls" both data integrity and message
+		 * bounds. After data exchange, both sums are compared
+		 * using control socket, and if message bounds wasn't
+		 * broken - two values must be equal.
+		 */
+		curr_hash += hash_djb2(buf, buf_size);
+		free(buf);
+	}
 
 	control_writeln("SENDDONE");
+	control_writeulong(curr_hash);
 	close(fd);
 }
 
 static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
 {
+	unsigned long sock_buf_size;
+	unsigned long remote_hash;
+	unsigned long curr_hash;
 	int fd;
-	char buf[16];
+	char buf[MAX_MSG_SIZE];
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 
@@ -317,25 +382,57 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
 		exit(EXIT_FAILURE);
 	}
 
+	sock_buf_size = SOCK_BUF_SIZE;
+
+	if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+		       &sock_buf_size, sizeof(sock_buf_size))) {
+		perror("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+		exit(EXIT_FAILURE);
+	}
+
+	if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+		       &sock_buf_size, sizeof(sock_buf_size))) {
+		perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Ready to receive data. */
+	control_writeln("SRVREADY");
+	/* Wait, until peer sends whole data. */
 	control_expectln("SENDDONE");
 	iov.iov_base = buf;
 	iov.iov_len = sizeof(buf);
 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;
 
-	for (int i = 0; i < MESSAGES_CNT; i++) {
-		if (recvmsg(fd, &msg, 0) != 1) {
-			perror("message bound violated");
-			exit(EXIT_FAILURE);
-		}
+	curr_hash = 0;
 
-		if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) {
-			perror("MSG_EOR");
+	while (1) {
+		ssize_t recv_size;
+
+		recv_size = recvmsg(fd, &msg, 0);
+
+		if (!recv_size)
+			break;
+
+		if (recv_size < 0) {
+			perror("recvmsg");
 			exit(EXIT_FAILURE);
 		}
+
+		if (msg.msg_flags & MSG_EOR)
+			curr_hash++;
+
+		curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size);
 	}
 
 	close(fd);
+	remote_hash = control_readulong();
+
+	if (curr_hash != remote_hash) {
+		fprintf(stderr, "Message bounds broken\n");
+		exit(EXIT_FAILURE);
+	}
 }
 
 #define MESSAGE_TRUNC_SZ 32
@@ -427,7 +524,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts)
 	tv.tv_usec = 0;
 
 	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) {
-		perror("setsockopt 'SO_RCVTIMEO'");
+		perror("setsockopt(SO_RCVTIMEO)");
 		exit(EXIT_FAILURE);
 	}
 
@@ -644,7 +741,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
 
 	if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT,
 		       &lowat_val, sizeof(lowat_val))) {
-		perror("setsockopt");
+		perror("setsockopt(SO_RCVLOWAT)");
 		exit(EXIT_FAILURE);
 	}
 
@@ -837,6 +934,7 @@ int main(int argc, char **argv)
 		.peer_cid = VMADDR_CID_ANY,
 	};
 
+	srand(time(NULL));
 	init_signals();
 
 	for (;;) {
-- 
cgit v1.2.3


From 685a21c314a8aec3549f25cb3f9d872e92245210 Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Date: Tue, 10 Jan 2023 10:17:02 +0000
Subject: test/vsock: add big message test

This adds test for sending message, bigger than peer's buffer size.
For SOCK_SEQPACKET socket it must fail, as this type of socket has
message size limit.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/vsock/vsock_test.c | 69 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 26c38ad9d07b..67e9f9df3a8c 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -569,6 +569,70 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts)
 	close(fd);
 }
 
+static void test_seqpacket_bigmsg_client(const struct test_opts *opts)
+{
+	unsigned long sock_buf_size;
+	ssize_t send_size;
+	socklen_t len;
+	void *data;
+	int fd;
+
+	len = sizeof(sock_buf_size);
+
+	fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+		       &sock_buf_size, &len)) {
+		perror("getsockopt");
+		exit(EXIT_FAILURE);
+	}
+
+	sock_buf_size++;
+
+	data = malloc(sock_buf_size);
+	if (!data) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+
+	send_size = send(fd, data, sock_buf_size, 0);
+	if (send_size != -1) {
+		fprintf(stderr, "expected 'send(2)' failure, got %zi\n",
+			send_size);
+		exit(EXIT_FAILURE);
+	}
+
+	if (errno != EMSGSIZE) {
+		fprintf(stderr, "expected EMSGSIZE in 'errno', got %i\n",
+			errno);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("CLISENT");
+
+	free(data);
+	close(fd);
+}
+
+static void test_seqpacket_bigmsg_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("CLISENT");
+
+	close(fd);
+}
+
 #define BUF_PATTERN_1 'a'
 #define BUF_PATTERN_2 'b'
 
@@ -851,6 +915,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_stream_poll_rcvlowat_client,
 		.run_server = test_stream_poll_rcvlowat_server,
 	},
+	{
+		.name = "SOCK_SEQPACKET big message",
+		.run_client = test_seqpacket_bigmsg_client,
+		.run_server = test_seqpacket_bigmsg_server,
+	},
 	{},
 };
 
-- 
cgit v1.2.3


From 8abbffd27cedd0f89f69e5ee2ff6841ea01511eb Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Date: Tue, 10 Jan 2023 10:18:32 +0000
Subject: test/vsock: vsock_perf utility

This adds utility to check vsock rx/tx performance.

Usage as sender:
./vsock_perf --sender <cid> --port <port> --bytes <bytes to send>
Usage as receiver:
./vsock_perf --port <port> --rcvlowat <SO_RCVLOWAT>

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/vsock/Makefile     |   3 +-
 tools/testing/vsock/README       |  34 ++++
 tools/testing/vsock/vsock_perf.c | 427 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 463 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/vsock/vsock_perf.c

(limited to 'tools/testing')

diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index f8293c6910c9..43a254f0e14d 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0-only
-all: test
+all: test vsock_perf
 test: vsock_test vsock_diag_test
 vsock_test: vsock_test.o timeout.o control.o util.o
 vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
+vsock_perf: vsock_perf.o
 
 CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
 .PHONY: all test clean
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
index 4d5045e7d2c3..84ee217ba8ee 100644
--- a/tools/testing/vsock/README
+++ b/tools/testing/vsock/README
@@ -35,3 +35,37 @@ Invoke test binaries in both directions as follows:
                        --control-port=$GUEST_IP \
                        --control-port=1234 \
                        --peer-cid=3
+
+vsock_perf utility
+-------------------
+'vsock_perf' is a simple tool to measure vsock performance. It works in
+sender/receiver modes: sender connect to peer at the specified port and
+starts data transmission to the receiver. After data processing is done,
+it prints several metrics(see below).
+
+Usage:
+# run as sender
+# connect to CID 2, port 1234, send 1G of data, tx buf size is 1M
+./vsock_perf --sender 2 --port 1234 --bytes 1G --buf-size 1M
+
+Output:
+tx performance: A Gbits/s
+
+Output explanation:
+A is calculated as "number of bits to send" / "time in tx loop"
+
+# run as receiver
+# listen port 1234, rx buf size is 1M, socket buf size is 1G, SO_RCVLOWAT is 64K
+./vsock_perf --port 1234 --buf-size 1M --vsk-size 1G --rcvlowat 64K
+
+Output:
+rx performance: A Gbits/s
+total in 'read()': B sec
+POLLIN wakeups: C
+average in 'read()': D ns
+
+Output explanation:
+A is calculated as "number of received bits" / "time in rx loop".
+B is time, spent in 'read()' system call(excluding 'poll()')
+C is number of 'poll()' wake ups with POLLIN bit set.
+D is B / C, e.g. average amount of time, spent in single 'read()'.
diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c
new file mode 100644
index 000000000000..a72520338f84
--- /dev/null
+++ b/tools/testing/vsock/vsock_perf.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vsock_perf - benchmark utility for vsock.
+ *
+ * Copyright (C) 2022 SberDevices.
+ *
+ * Author: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
+ */
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <stdint.h>
+#include <poll.h>
+#include <sys/socket.h>
+#include <linux/vm_sockets.h>
+
+#define DEFAULT_BUF_SIZE_BYTES	(128 * 1024)
+#define DEFAULT_TO_SEND_BYTES	(64 * 1024)
+#define DEFAULT_VSOCK_BUF_BYTES (256 * 1024)
+#define DEFAULT_RCVLOWAT_BYTES	1
+#define DEFAULT_PORT		1234
+
+#define BYTES_PER_GB		(1024 * 1024 * 1024ULL)
+#define NSEC_PER_SEC		(1000000000ULL)
+
+static unsigned int port = DEFAULT_PORT;
+static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES;
+static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES;
+
+static void error(const char *s)
+{
+	perror(s);
+	exit(EXIT_FAILURE);
+}
+
+static time_t current_nsec(void)
+{
+	struct timespec ts;
+
+	if (clock_gettime(CLOCK_REALTIME, &ts))
+		error("clock_gettime");
+
+	return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
+}
+
+/* From lib/cmdline.c. */
+static unsigned long memparse(const char *ptr)
+{
+	char *endptr;
+
+	unsigned long long ret = strtoull(ptr, &endptr, 0);
+
+	switch (*endptr) {
+	case 'E':
+	case 'e':
+		ret <<= 10;
+	case 'P':
+	case 'p':
+		ret <<= 10;
+	case 'T':
+	case 't':
+		ret <<= 10;
+	case 'G':
+	case 'g':
+		ret <<= 10;
+	case 'M':
+	case 'm':
+		ret <<= 10;
+	case 'K':
+	case 'k':
+		ret <<= 10;
+		endptr++;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void vsock_increase_buf_size(int fd)
+{
+	if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
+		       &vsock_buf_bytes, sizeof(vsock_buf_bytes)))
+		error("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");
+
+	if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+		       &vsock_buf_bytes, sizeof(vsock_buf_bytes)))
+		error("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+}
+
+static int vsock_connect(unsigned int cid, unsigned int port)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = port,
+			.svm_cid = cid,
+		},
+	};
+	int fd;
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+	if (fd < 0) {
+		perror("socket");
+		return -1;
+	}
+
+	if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+		perror("connect");
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+static float get_gbps(unsigned long bits, time_t ns_delta)
+{
+	return ((float)bits / 1000000000ULL) /
+	       ((float)ns_delta / NSEC_PER_SEC);
+}
+
+static void run_receiver(unsigned long rcvlowat_bytes)
+{
+	unsigned int read_cnt;
+	time_t rx_begin_ns;
+	time_t in_read_ns;
+	size_t total_recv;
+	int client_fd;
+	char *data;
+	int fd;
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = port,
+			.svm_cid = VMADDR_CID_ANY,
+		},
+	};
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} clientaddr;
+
+	socklen_t clientaddr_len = sizeof(clientaddr.svm);
+
+	printf("Run as receiver\n");
+	printf("Listen port %u\n", port);
+	printf("RX buffer %lu bytes\n", buf_size_bytes);
+	printf("vsock buffer %lu bytes\n", vsock_buf_bytes);
+	printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes);
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+	if (fd < 0)
+		error("socket");
+
+	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0)
+		error("bind");
+
+	if (listen(fd, 1) < 0)
+		error("listen");
+
+	client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
+
+	if (client_fd < 0)
+		error("accept");
+
+	vsock_increase_buf_size(client_fd);
+
+	if (setsockopt(client_fd, SOL_SOCKET, SO_RCVLOWAT,
+		       &rcvlowat_bytes,
+		       sizeof(rcvlowat_bytes)))
+		error("setsockopt(SO_RCVLOWAT)");
+
+	data = malloc(buf_size_bytes);
+
+	if (!data) {
+		fprintf(stderr, "'malloc()' failed\n");
+		exit(EXIT_FAILURE);
+	}
+
+	read_cnt = 0;
+	in_read_ns = 0;
+	total_recv = 0;
+	rx_begin_ns = current_nsec();
+
+	while (1) {
+		struct pollfd fds = { 0 };
+
+		fds.fd = client_fd;
+		fds.events = POLLIN | POLLERR |
+			     POLLHUP | POLLRDHUP;
+
+		if (poll(&fds, 1, -1) < 0)
+			error("poll");
+
+		if (fds.revents & POLLERR) {
+			fprintf(stderr, "'poll()' error\n");
+			exit(EXIT_FAILURE);
+		}
+
+		if (fds.revents & POLLIN) {
+			ssize_t bytes_read;
+			time_t t;
+
+			t = current_nsec();
+			bytes_read = read(fds.fd, data, buf_size_bytes);
+			in_read_ns += (current_nsec() - t);
+			read_cnt++;
+
+			if (!bytes_read)
+				break;
+
+			if (bytes_read < 0) {
+				perror("read");
+				exit(EXIT_FAILURE);
+			}
+
+			total_recv += bytes_read;
+		}
+
+		if (fds.revents & (POLLHUP | POLLRDHUP))
+			break;
+	}
+
+	printf("total bytes received: %zu\n", total_recv);
+	printf("rx performance: %f Gbits/s\n",
+	       get_gbps(total_recv * 8, current_nsec() - rx_begin_ns));
+	printf("total time in 'read()': %f sec\n", (float)in_read_ns / NSEC_PER_SEC);
+	printf("average time in 'read()': %f ns\n", (float)in_read_ns / read_cnt);
+	printf("POLLIN wakeups: %i\n", read_cnt);
+
+	free(data);
+	close(client_fd);
+	close(fd);
+}
+
+static void run_sender(int peer_cid, unsigned long to_send_bytes)
+{
+	time_t tx_begin_ns;
+	time_t tx_total_ns;
+	size_t total_send;
+	void *data;
+	int fd;
+
+	printf("Run as sender\n");
+	printf("Connect to %i:%u\n", peer_cid, port);
+	printf("Send %lu bytes\n", to_send_bytes);
+	printf("TX buffer %lu bytes\n", buf_size_bytes);
+
+	fd = vsock_connect(peer_cid, port);
+
+	if (fd < 0)
+		exit(EXIT_FAILURE);
+
+	data = malloc(buf_size_bytes);
+
+	if (!data) {
+		fprintf(stderr, "'malloc()' failed\n");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(data, 0, buf_size_bytes);
+	total_send = 0;
+	tx_begin_ns = current_nsec();
+
+	while (total_send < to_send_bytes) {
+		ssize_t sent;
+
+		sent = write(fd, data, buf_size_bytes);
+
+		if (sent <= 0)
+			error("write");
+
+		total_send += sent;
+	}
+
+	tx_total_ns = current_nsec() - tx_begin_ns;
+
+	printf("total bytes sent: %zu\n", total_send);
+	printf("tx performance: %f Gbits/s\n",
+	       get_gbps(total_send * 8, tx_total_ns));
+	printf("total time in 'write()': %f sec\n",
+	       (float)tx_total_ns / NSEC_PER_SEC);
+
+	close(fd);
+	free(data);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+	{
+		.name = "help",
+		.has_arg = no_argument,
+		.val = 'H',
+	},
+	{
+		.name = "sender",
+		.has_arg = required_argument,
+		.val = 'S',
+	},
+	{
+		.name = "port",
+		.has_arg = required_argument,
+		.val = 'P',
+	},
+	{
+		.name = "bytes",
+		.has_arg = required_argument,
+		.val = 'M',
+	},
+	{
+		.name = "buf-size",
+		.has_arg = required_argument,
+		.val = 'B',
+	},
+	{
+		.name = "vsk-size",
+		.has_arg = required_argument,
+		.val = 'V',
+	},
+	{
+		.name = "rcvlowat",
+		.has_arg = required_argument,
+		.val = 'R',
+	},
+	{},
+};
+
+static void usage(void)
+{
+	printf("Usage: ./vsock_perf [--help] [options]\n"
+	       "\n"
+	       "This is benchmarking utility, to test vsock performance.\n"
+	       "It runs in two modes: sender or receiver. In sender mode, it\n"
+	       "connects to the specified CID and starts data transmission.\n"
+	       "\n"
+	       "Options:\n"
+	       "  --help			This message\n"
+	       "  --sender   <cid>		Sender mode (receiver default)\n"
+	       "                                <cid> of the receiver to connect to\n"
+	       "  --port     <port>		Port (default %d)\n"
+	       "  --bytes    <bytes>KMG		Bytes to send (default %d)\n"
+	       "  --buf-size <bytes>KMG		Data buffer size (default %d). In sender mode\n"
+	       "                                it is the buffer size, passed to 'write()'. In\n"
+	       "                                receiver mode it is the buffer size passed to 'read()'.\n"
+	       "  --vsk-size <bytes>KMG		Socket buffer size (default %d)\n"
+	       "  --rcvlowat <bytes>KMG		SO_RCVLOWAT value (default %d)\n"
+	       "\n", DEFAULT_PORT, DEFAULT_TO_SEND_BYTES,
+	       DEFAULT_BUF_SIZE_BYTES, DEFAULT_VSOCK_BUF_BYTES,
+	       DEFAULT_RCVLOWAT_BYTES);
+	exit(EXIT_FAILURE);
+}
+
+static long strtolx(const char *arg)
+{
+	long value;
+	char *end;
+
+	value = strtol(arg, &end, 10);
+
+	if (end != arg + strlen(arg))
+		usage();
+
+	return value;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES;
+	unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES;
+	int peer_cid = -1;
+	bool sender = false;
+
+	while (1) {
+		int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+		if (opt == -1)
+			break;
+
+		switch (opt) {
+		case 'V': /* Peer buffer size. */
+			vsock_buf_bytes = memparse(optarg);
+			break;
+		case 'R': /* SO_RCVLOWAT value. */
+			rcvlowat_bytes = memparse(optarg);
+			break;
+		case 'P': /* Port to connect to. */
+			port = strtolx(optarg);
+			break;
+		case 'M': /* Bytes to send. */
+			to_send_bytes = memparse(optarg);
+			break;
+		case 'B': /* Size of rx/tx buffer. */
+			buf_size_bytes = memparse(optarg);
+			break;
+		case 'S': /* Sender mode. CID to connect to. */
+			peer_cid = strtolx(optarg);
+			sender = true;
+			break;
+		case 'H': /* Help. */
+			usage();
+			break;
+		default:
+			usage();
+		}
+	}
+
+	if (!sender)
+		run_receiver(rcvlowat_bytes);
+	else
+		run_sender(peer_cid, to_send_bytes);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 1c48391bc6739f5e3306919d4b887b92c35d5490 Mon Sep 17 00:00:00 2001
From: Roberto Valenzuela <valenzuelarober@gmail.com>
Date: Fri, 13 Jan 2023 13:02:57 -0500
Subject: selftests/bpf: Fix missing space error

Add the missing space after 'dest' variable assignment.
This change will resolve the following checkpatch.pl
script error:

ERROR: spaces required around that '+=' (ctx:VxW)

Signed-off-by: Roberto Valenzuela <valenzuelarober@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230113180257.39769-1-valenzuelarober@gmail.com
---
 tools/testing/selftests/bpf/progs/test_xdp_vlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index 134768f6b788..cdf3c48d6cbb 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -195,7 +195,7 @@ int  xdp_prognum2(struct xdp_md *ctx)
 
 	/* Moving Ethernet header, dest overlap with src, memmove handle this */
 	dest = data;
-	dest+= VLAN_HDR_SZ;
+	dest += VLAN_HDR_SZ;
 	/*
 	 * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
 	 * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
-- 
cgit v1.2.3


From 7105f76fb56f5ed66a59bc048bc71e9f100e1d39 Mon Sep 17 00:00:00 2001
From: Ziyang Xuan <william.xuanziyang@huawei.com>
Date: Fri, 13 Jan 2023 17:25:10 +0800
Subject: selftests/bpf: add ipip6 and ip6ip decap to test_tc_tunnel

Add ipip6 and ip6ip decap testcases. Verify that bpf_skb_adjust_room()
correctly decapsulate ipip6 and ip6ip tunnel packets.

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/dfd2d8cfdf9111bd129170d4345296f53bee6a67.1673574419.git.william.xuanziyang@huawei.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 91 +++++++++++++++++++++-
 tools/testing/selftests/bpf/test_tc_tunnel.sh      | 15 ++--
 2 files changed, 98 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index a0e7762b1e5a..e6e678aa9874 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -38,6 +38,10 @@ static const int cfg_udp_src = 20000;
 #define	VXLAN_FLAGS     0x8
 #define	VXLAN_VNI       1
 
+#ifndef NEXTHDR_DEST
+#define NEXTHDR_DEST	60
+#endif
+
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 						     MPLS_LS_S_MASK | 0xff);
@@ -363,6 +367,61 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	return TC_ACT_OK;
 }
 
+static int encap_ipv6_ipip6(struct __sk_buff *skb)
+{
+	struct iphdr iph_inner;
+	struct v6hdr h_outer;
+	struct tcphdr tcph;
+	struct ethhdr eth;
+	__u64 flags;
+	int olen;
+
+	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+			       sizeof(iph_inner)) < 0)
+		return TC_ACT_OK;
+
+	/* filter only packets we want */
+	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
+			       &tcph, sizeof(tcph)) < 0)
+		return TC_ACT_OK;
+
+	if (tcph.dest != __bpf_constant_htons(cfg_port))
+		return TC_ACT_OK;
+
+	olen = sizeof(h_outer.ip);
+
+	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+
+	/* add room between mac and network header */
+	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+		return TC_ACT_SHOT;
+
+	/* prepare new outer network header */
+	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
+	h_outer.ip.version = 6;
+	h_outer.ip.hop_limit = iph_inner.ttl;
+	h_outer.ip.saddr.s6_addr[1] = 0xfd;
+	h_outer.ip.saddr.s6_addr[15] = 1;
+	h_outer.ip.daddr.s6_addr[1] = 0xfd;
+	h_outer.ip.daddr.s6_addr[15] = 2;
+	h_outer.ip.payload_len = iph_inner.tot_len;
+	h_outer.ip.nexthdr = IPPROTO_IPIP;
+
+	/* store new outer network header */
+	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+				BPF_F_INVALIDATE_HASH) < 0)
+		return TC_ACT_SHOT;
+
+	/* update eth->h_proto */
+	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
+		return TC_ACT_SHOT;
+	eth.h_proto = bpf_htons(ETH_P_IPV6);
+	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
+		return TC_ACT_SHOT;
+
+	return TC_ACT_OK;
+}
+
 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 				      __u16 l2_proto)
 {
@@ -461,6 +520,15 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
+SEC("encap_ipip6_none")
+int __encap_ipip6_none(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return encap_ipv6_ipip6(skb);
+	else
+		return TC_ACT_OK;
+}
+
 SEC("encap_ip6gre_none")
 int __encap_ip6gre_none(struct __sk_buff *skb)
 {
@@ -528,13 +596,33 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
 
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
+	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+	struct ipv6_opt_hdr ip6_opt_hdr;
 	struct gre_hdr greh;
 	struct udphdr udph;
 	int olen = len;
 
 	switch (proto) {
 	case IPPROTO_IPIP:
+		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+		break;
 	case IPPROTO_IPV6:
+		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+		break;
+	case NEXTHDR_DEST:
+		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
+				       sizeof(ip6_opt_hdr)) < 0)
+			return TC_ACT_OK;
+		switch (ip6_opt_hdr.nexthdr) {
+		case IPPROTO_IPIP:
+			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+			break;
+		case IPPROTO_IPV6:
+			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+			break;
+		default:
+			return TC_ACT_OK;
+		}
 		break;
 	case IPPROTO_GRE:
 		olen += sizeof(struct gre_hdr);
@@ -569,8 +657,7 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		return TC_ACT_OK;
 	}
 
-	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
-				BPF_F_ADJ_ROOM_FIXED_GSO))
+	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
 		return TC_ACT_SHOT;
 
 	return TC_ACT_OK;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 334bdfeab940..910044f08908 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -100,6 +100,9 @@ if [[ "$#" -eq "0" ]]; then
 	echo "ipip"
 	$0 ipv4 ipip none 100
 
+	echo "ipip6"
+	$0 ipv4 ipip6 none 100
+
 	echo "ip6ip6"
 	$0 ipv6 ip6tnl none 100
 
@@ -224,6 +227,9 @@ elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
 elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
 	ttype="vxlan"
 	targs="id 1 dstport 8472 udp6zerocsumrx"
+elif [[ "$tuntype" == "ipip6" ]]; then
+	ttype="ip6tnl"
+	targs=""
 else
 	ttype=$tuntype
 	targs=""
@@ -233,6 +239,9 @@ fi
 if [[ "${tuntype}" == "sit" ]]; then
 	link_addr1="${ns1_v4}"
 	link_addr2="${ns2_v4}"
+elif [[ "${tuntype}" == "ipip6" ]]; then
+	link_addr1="${ns1_v6}"
+	link_addr2="${ns2_v6}"
 else
 	link_addr1="${addr1}"
 	link_addr2="${addr2}"
@@ -287,12 +296,6 @@ else
 	server_listen
 fi
 
-# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
-if [[ "${tuntype}" == "sit" ]]; then
-	echo OK
-	exit 0
-fi
-
 # serverside, use BPF for decap
 ip netns exec "${ns2}" ip link del dev testtun0
 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
-- 
cgit v1.2.3


From c0f264e4edb60db410462d513e29d2de3ef7de56 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 17 Jan 2023 23:37:05 +0100
Subject: bpf/selftests: Add verifier tests for loading sleepable programs

Adding verifier tests for loading all types od allowed
sleepable programs plus reject for tp_btf type.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20230117223705.440975-2-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/sleepable.c | 91 ++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/sleepable.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c
new file mode 100644
index 000000000000..bea0daef908a
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/sleepable.c
@@ -0,0 +1,91 @@
+{
+	"sleepable fentry accept",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_FENTRY,
+	.kfunc = "bpf_fentry_test1",
+	.result = ACCEPT,
+	.flags = BPF_F_SLEEPABLE,
+	.runs = -1,
+},
+{
+	"sleepable fexit accept",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_FENTRY,
+	.kfunc = "bpf_fentry_test1",
+	.result = ACCEPT,
+	.flags = BPF_F_SLEEPABLE,
+	.runs = -1,
+},
+{
+	"sleepable fmod_ret accept",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_MODIFY_RETURN,
+	.kfunc = "bpf_fentry_test1",
+	.result = ACCEPT,
+	.flags = BPF_F_SLEEPABLE,
+	.runs = -1,
+},
+{
+	"sleepable iter accept",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_ITER,
+	.kfunc = "task",
+	.result = ACCEPT,
+	.flags = BPF_F_SLEEPABLE,
+	.runs = -1,
+},
+{
+	"sleepable lsm accept",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.result = ACCEPT,
+	.flags = BPF_F_SLEEPABLE,
+	.runs = -1,
+},
+{
+	"sleepable uprobe accept",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_KPROBE,
+	.kfunc = "bpf_fentry_test1",
+	.result = ACCEPT,
+	.flags = BPF_F_SLEEPABLE,
+	.runs = -1,
+},
+{
+	"sleepable raw tracepoint reject",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_RAW_TP,
+	.kfunc = "sched_switch",
+	.result = REJECT,
+	.errstr = "Only fentry/fexit/fmod_ret, lsm, iter and uprobe programs can be sleepable",
+	.flags = BPF_F_SLEEPABLE,
+	.runs = -1,
+},
-- 
cgit v1.2.3


From 92afc5329a5b23d876b215b783d200352d5aaea6 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Wed, 18 Jan 2023 15:56:44 +0800
Subject: selftests/bpf: Fix build errors if CONFIG_NF_CONNTRACK=m

If CONFIG_NF_CONNTRACK=m, there are no definitions of NF_NAT_MANIP_SRC
and NF_NAT_MANIP_DST in vmlinux.h, build test_bpf_nf.c failed.

$ make -C tools/testing/selftests/bpf/

  CLNG-BPF [test_maps] test_bpf_nf.bpf.o
progs/test_bpf_nf.c:160:42: error: use of undeclared identifier 'NF_NAT_MANIP_SRC'
                bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC);
                                                       ^
progs/test_bpf_nf.c:163:42: error: use of undeclared identifier 'NF_NAT_MANIP_DST'
                bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST);
                                                       ^
2 errors generated.

Copy the definitions in include/net/netfilter/nf_nat.h to test_bpf_nf.c,
in order to avoid redefinitions if CONFIG_NF_CONNTRACK=y, rename them with
___local suffix. This is similar with commit 1058b6a78db2 ("selftests/bpf:
Do not fail build if CONFIG_NF_CONNTRACK=m/n").

Fixes: b06b45e82b59 ("selftests/bpf: add tests for bpf_ct_set_nat_info kfunc")
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/1674028604-7113-1-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_bpf_nf.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 227e85e85dda..9fc603c9d673 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -34,6 +34,11 @@ __be16 dport = 0;
 int test_exist_lookup = -ENOENT;
 u32 test_exist_lookup_mark = 0;
 
+enum nf_nat_manip_type___local {
+	NF_NAT_MANIP_SRC___local,
+	NF_NAT_MANIP_DST___local
+};
+
 struct nf_conn;
 
 struct bpf_ct_opts___local {
@@ -58,7 +63,7 @@ int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
 int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
 int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
 int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *,
-			int port, enum nf_nat_manip_type) __ksym;
+			int port, enum nf_nat_manip_type___local) __ksym;
 
 static __always_inline void
 nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
@@ -157,10 +162,10 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 
 		/* snat */
 		saddr.ip = bpf_get_prandom_u32();
-		bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC);
+		bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
 		/* dnat */
 		daddr.ip = bpf_get_prandom_u32();
-		bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST);
+		bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
 
 		ct_ins = bpf_ct_insert_entry(ct);
 		if (ct_ins) {
-- 
cgit v1.2.3


From 3c107f36db061603bee7564fbd6388b1f1879fd3 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 18 Jan 2023 10:09:27 +0800
Subject: selftests/net: mv bpf/nat6to4.c to net folder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some issues with the bpf/nat6to4.c building.

1. It use TEST_CUSTOM_PROGS, which will add the nat6to4.o to
   kselftest-list file and run by common run_tests.
2. When building the test via `make -C tools/testing/selftests/
   TARGETS="net"`, the nat6to4.o will be build in selftests/net/bpf/
   folder. But in test udpgro_frglist.sh it refers to ../bpf/nat6to4.o.
   The correct path should be ./bpf/nat6to4.o.
3. If building the test via `make -C tools/testing/selftests/ TARGETS="net"
   install`. The nat6to4.o will be installed to kselftest_install/net/
   folder. Then the udpgro_frglist.sh should refer to ./nat6to4.o.

To fix the confusing test path, let's just move the nat6to4.c to net folder
and build it as TEST_GEN_FILES.

Fixes: edae34a3ed92 ("selftests net: add UDP GRO fraglist + bpf self-tests")
Tested-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20230118020927.3971864-1-liuhangbin@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/Makefile          |  50 ++++-
 tools/testing/selftests/net/bpf/Makefile      |  51 -----
 tools/testing/selftests/net/bpf/nat6to4.c     | 285 --------------------------
 tools/testing/selftests/net/nat6to4.c         | 285 ++++++++++++++++++++++++++
 tools/testing/selftests/net/udpgro_frglist.sh |   8 +-
 5 files changed, 337 insertions(+), 342 deletions(-)
 delete mode 100644 tools/testing/selftests/net/bpf/Makefile
 delete mode 100644 tools/testing/selftests/net/bpf/nat6to4.c
 create mode 100644 tools/testing/selftests/net/nat6to4.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3007e98a6d64..47314f0b3006 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -75,14 +75,60 @@ TEST_GEN_PROGS += so_incoming_cpu
 TEST_PROGS += sctp_vrf.sh
 TEST_GEN_FILES += sctp_hello
 TEST_GEN_FILES += csum
+TEST_GEN_FILES += nat6to4.o
 
 TEST_FILES := settings
 
 include ../lib.mk
 
-include bpf/Makefile
-
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
 $(OUTPUT)/bind_bhash: LDLIBS += -lpthread
+
+# Rules to generate bpf obj nat6to4.o
+CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(abspath ../../../lib/bpf)
+APIDIR := $(abspath ../../../include/uapi)
+
+CCINCLUDE += -I../bpf
+CCINCLUDE += -I../../../../usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+	mkdir -p $@
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS)
+	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
+	   $(APIDIR)/linux/bpf.h					       \
+	   | $(BUILD_DIR)/libbpf
+	$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/     \
+		    EXTRA_CFLAGS='-g -O0'				       \
+		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN := $(SCRATCH_DIR)
diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile
deleted file mode 100644
index 4abaf16d2077..000000000000
--- a/tools/testing/selftests/net/bpf/Makefile
+++ /dev/null
@@ -1,51 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-CLANG ?= clang
-SCRATCH_DIR := $(OUTPUT)/tools
-BUILD_DIR := $(SCRATCH_DIR)/build
-BPFDIR := $(abspath ../../../lib/bpf)
-APIDIR := $(abspath ../../../include/uapi)
-
-CCINCLUDE += -I../../bpf
-CCINCLUDE += -I../../../../../usr/include/
-CCINCLUDE += -I$(SCRATCH_DIR)/include
-
-BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
-
-MAKE_DIRS := $(BUILD_DIR)/libbpf $(OUTPUT)/bpf
-$(MAKE_DIRS):
-	mkdir -p $@
-
-TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
-all: $(TEST_CUSTOM_PROGS)
-
-# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
-# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
-#
-# Use '-idirafter': Don't interfere with include mechanics except where the
-# build would have failed anyways.
-define get_sys_includes
-$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
-	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
-endef
-
-ifneq ($(CROSS_COMPILE),)
-CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
-endif
-
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
-
-$(TEST_CUSTOM_PROGS): $(OUTPUT)/%.o: %.c $(BPFOBJ) | $(MAKE_DIRS)
-	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
-
-$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
-	   $(APIDIR)/linux/bpf.h					       \
-	   | $(BUILD_DIR)/libbpf
-	$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/     \
-		    EXTRA_CFLAGS='-g -O0'				       \
-		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
-
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)
-
diff --git a/tools/testing/selftests/net/bpf/nat6to4.c b/tools/testing/selftests/net/bpf/nat6to4.c
deleted file mode 100644
index ac54c36b25fc..000000000000
--- a/tools/testing/selftests/net/bpf/nat6to4.c
+++ /dev/null
@@ -1,285 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * This code is taken from the Android Open Source Project and the author
- * (Maciej Żenczykowski) has gave permission to relicense it under the
- * GPLv2. Therefore this program is free software;
- * You can redistribute it and/or modify it under the terms of the GNU
- * General Public License version 2 as published by the Free Software
- * Foundation
-
- * The original headers, including the original license headers, are
- * included below for completeness.
- *
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <linux/bpf.h>
-#include <linux/if.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/pkt_cls.h>
-#include <linux/swab.h>
-#include <stdbool.h>
-#include <stdint.h>
-
-
-#include <linux/udp.h>
-
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_endian.h>
-
-#define IP_DF 0x4000  // Flag: "Don't Fragment"
-
-SEC("schedcls/ingress6/nat_6")
-int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
-{
-	const int l2_header_size =  sizeof(struct ethhdr);
-	void *data = (void *)(long)skb->data;
-	const void *data_end = (void *)(long)skb->data_end;
-	const struct ethhdr * const eth = data;  // used iff is_ethernet
-	const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
-
-	// Require ethernet dst mac address to be our unicast address.
-	if  (skb->pkt_type != PACKET_HOST)
-		return TC_ACT_OK;
-
-	// Must be meta-ethernet IPv6 frame
-	if (skb->protocol != bpf_htons(ETH_P_IPV6))
-		return TC_ACT_OK;
-
-	// Must have (ethernet and) ipv6 header
-	if (data + l2_header_size + sizeof(*ip6) > data_end)
-		return TC_ACT_OK;
-
-	// Ethertype - if present - must be IPv6
-	if (eth->h_proto != bpf_htons(ETH_P_IPV6))
-		return TC_ACT_OK;
-
-	// IP version must be 6
-	if (ip6->version != 6)
-		return TC_ACT_OK;
-	// Maximum IPv6 payload length that can be translated to IPv4
-	if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
-		return TC_ACT_OK;
-	switch (ip6->nexthdr) {
-	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
-	case IPPROTO_UDP:  // address means there is no need to update their checksums.
-	case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
-	case IPPROTO_ESP:  // since there is never a checksum to update.
-		break;
-	default:  // do not know how to handle anything else
-		return TC_ACT_OK;
-	}
-
-	struct ethhdr eth2;  // used iff is_ethernet
-
-	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
-	eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
-
-	struct iphdr ip = {
-		.version = 4,                                                      // u4
-		.ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
-		.tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
-		.tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
-		.id = 0,                                                           // u16
-		.frag_off = bpf_htons(IP_DF),                                          // u16
-		.ttl = ip6->hop_limit,                                             // u8
-		.protocol = ip6->nexthdr,                                          // u8
-		.check = 0,                                                        // u16
-		.saddr = 0x0201a8c0,                            // u32
-		.daddr = 0x0101a8c0,                                         // u32
-	};
-
-	// Calculate the IPv4 one's complement checksum of the IPv4 header.
-	__wsum sum4 = 0;
-
-	for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
-		sum4 += ((__u16 *)&ip)[i];
-
-	// Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
-	ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
-
-	// Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
-	__wsum sum6 = 0;
-	// We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
-	for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
-		sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
-
-	// Note that there is no L4 checksum update: we are relying on the checksum neutrality
-	// of the ipv6 address chosen by netd's ClatdController.
-
-	// Packet mutations begin - point of no return, but if this first modification fails
-	// the packet is probably still pristine, so let clatd handle it.
-	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
-		return TC_ACT_OK;
-	bpf_csum_update(skb, sum6);
-
-	data = (void *)(long)skb->data;
-	data_end = (void *)(long)skb->data_end;
-	if (data + l2_header_size + sizeof(struct iphdr) > data_end)
-		return TC_ACT_SHOT;
-
-	struct ethhdr *new_eth = data;
-
-	// Copy over the updated ethernet header
-	*new_eth = eth2;
-
-	// Copy over the new ipv4 header.
-	*(struct iphdr *)(new_eth + 1) = ip;
-	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
-}
-
-SEC("schedcls/egress4/snat4")
-int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
-{
-	const int l2_header_size =  sizeof(struct ethhdr);
-	void *data = (void *)(long)skb->data;
-	const void *data_end = (void *)(long)skb->data_end;
-	const struct ethhdr *const eth = data;  // used iff is_ethernet
-	const struct iphdr *const ip4 = (void *)(eth + 1);
-
-	// Must be meta-ethernet IPv4 frame
-	if (skb->protocol != bpf_htons(ETH_P_IP))
-		return TC_ACT_OK;
-
-	// Must have ipv4 header
-	if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
-		return TC_ACT_OK;
-
-	// Ethertype - if present - must be IPv4
-	if (eth->h_proto != bpf_htons(ETH_P_IP))
-		return TC_ACT_OK;
-
-	// IP version must be 4
-	if (ip4->version != 4)
-		return TC_ACT_OK;
-
-	// We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
-	if (ip4->ihl != 5)
-		return TC_ACT_OK;
-
-	// Maximum IPv6 payload length that can be translated to IPv4
-	if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
-		return TC_ACT_OK;
-
-	// Calculate the IPv4 one's complement checksum of the IPv4 header.
-	__wsum sum4 = 0;
-
-	for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
-		sum4 += ((__u16 *)ip4)[i];
-
-	// Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
-	// for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
-	if (sum4 != 0xFFFF)
-		return TC_ACT_OK;
-
-	// Minimum IPv4 total length is the size of the header
-	if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
-		return TC_ACT_OK;
-
-	// We are incapable of dealing with IPv4 fragments
-	if (ip4->frag_off & ~bpf_htons(IP_DF))
-		return TC_ACT_OK;
-
-	switch (ip4->protocol) {
-	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
-	case IPPROTO_GRE:  // address means there is no need to update their checksums.
-	case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
-		break;         // since there is never a checksum to update.
-
-	case IPPROTO_UDP:  // See above comment, but must also have UDP header...
-		if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
-			return TC_ACT_OK;
-		const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
-		// If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
-		// checksum.  Otherwise the network or more likely the NAT64 gateway might
-		// drop the packet because in most cases IPv6/UDP packets with a zero checksum
-		// are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
-		if (!uh->check)
-			return TC_ACT_OK;
-		break;
-
-	default:  // do not know how to handle anything else
-		return TC_ACT_OK;
-	}
-	struct ethhdr eth2;  // used iff is_ethernet
-
-	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
-	eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
-
-	struct ipv6hdr ip6 = {
-		.version = 6,                                    // __u8:4
-		.priority = ip4->tos >> 4,                       // __u8:4
-		.flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
-		.payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
-		.nexthdr = ip4->protocol,                        // __u8
-		.hop_limit = ip4->ttl,                           // __u8
-	};
-	ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
-	ip6.saddr.in6_u.u6_addr32[1] = 0;
-	ip6.saddr.in6_u.u6_addr32[2] = 0;
-	ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
-	ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
-	ip6.daddr.in6_u.u6_addr32[1] = 0;
-	ip6.daddr.in6_u.u6_addr32[2] = 0;
-	ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
-
-	// Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
-	__wsum sum6 = 0;
-	// We'll end up with a non-zero sum due to ip6.version == 6
-	for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
-		sum6 += ((__u16 *)&ip6)[i];
-
-	// Packet mutations begin - point of no return, but if this first modification fails
-	// the packet is probably still pristine, so let clatd handle it.
-	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
-		return TC_ACT_OK;
-
-	// This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
-	// In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
-	// thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
-	// However, we've already verified the ipv4 checksum is correct and thus 0.
-	// Thus we only need to add the ipv6 header's sum.
-	//
-	// bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
-	// (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
-	bpf_csum_update(skb, sum6);
-
-	// bpf_skb_change_proto() invalidates all pointers - reload them.
-	data = (void *)(long)skb->data;
-	data_end = (void *)(long)skb->data_end;
-
-	// I cannot think of any valid way for this error condition to trigger, however I do
-	// believe the explicit check is required to keep the in kernel ebpf verifier happy.
-	if (data + l2_header_size + sizeof(ip6) > data_end)
-		return TC_ACT_SHOT;
-
-	struct ethhdr *new_eth = data;
-
-	// Copy over the updated ethernet header
-	*new_eth = eth2;
-	// Copy over the new ipv4 header.
-	*(struct ipv6hdr *)(new_eth + 1) = ip6;
-	return TC_ACT_OK;
-}
-
-char _license[] SEC("license") = ("GPL");
diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.c
new file mode 100644
index 000000000000..ac54c36b25fc
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000  // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr * const eth = data;  // used iff is_ethernet
+	const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
+
+	// Require ethernet dst mac address to be our unicast address.
+	if  (skb->pkt_type != PACKET_HOST)
+		return TC_ACT_OK;
+
+	// Must be meta-ethernet IPv6 frame
+	if (skb->protocol != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// Must have (ethernet and) ipv6 header
+	if (data + l2_header_size + sizeof(*ip6) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv6
+	if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// IP version must be 6
+	if (ip6->version != 6)
+		return TC_ACT_OK;
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+		return TC_ACT_OK;
+	switch (ip6->nexthdr) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_UDP:  // address means there is no need to update their checksums.
+	case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
+	case IPPROTO_ESP:  // since there is never a checksum to update.
+		break;
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
+
+	struct iphdr ip = {
+		.version = 4,                                                      // u4
+		.ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
+		.tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
+		.tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
+		.id = 0,                                                           // u16
+		.frag_off = bpf_htons(IP_DF),                                          // u16
+		.ttl = ip6->hop_limit,                                             // u8
+		.protocol = ip6->nexthdr,                                          // u8
+		.check = 0,                                                        // u16
+		.saddr = 0x0201a8c0,                            // u32
+		.daddr = 0x0101a8c0,                                         // u32
+	};
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)&ip)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
+
+	// Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+	for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+		sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
+
+	// Note that there is no L4 checksum update: we are relying on the checksum neutrality
+	// of the ipv6 address chosen by netd's ClatdController.
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+		return TC_ACT_OK;
+	bpf_csum_update(skb, sum6);
+
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+	if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+
+	// Copy over the new ipv4 header.
+	*(struct iphdr *)(new_eth + 1) = ip;
+	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr *const eth = data;  // used iff is_ethernet
+	const struct iphdr *const ip4 = (void *)(eth + 1);
+
+	// Must be meta-ethernet IPv4 frame
+	if (skb->protocol != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// Must have ipv4 header
+	if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv4
+	if (eth->h_proto != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// IP version must be 4
+	if (ip4->version != 4)
+		return TC_ACT_OK;
+
+	// We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+	if (ip4->ihl != 5)
+		return TC_ACT_OK;
+
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+		return TC_ACT_OK;
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)ip4)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	// for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+	if (sum4 != 0xFFFF)
+		return TC_ACT_OK;
+
+	// Minimum IPv4 total length is the size of the header
+	if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+		return TC_ACT_OK;
+
+	// We are incapable of dealing with IPv4 fragments
+	if (ip4->frag_off & ~bpf_htons(IP_DF))
+		return TC_ACT_OK;
+
+	switch (ip4->protocol) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_GRE:  // address means there is no need to update their checksums.
+	case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
+		break;         // since there is never a checksum to update.
+
+	case IPPROTO_UDP:  // See above comment, but must also have UDP header...
+		if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+			return TC_ACT_OK;
+		const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+		// If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+		// checksum.  Otherwise the network or more likely the NAT64 gateway might
+		// drop the packet because in most cases IPv6/UDP packets with a zero checksum
+		// are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
+		if (!uh->check)
+			return TC_ACT_OK;
+		break;
+
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
+
+	struct ipv6hdr ip6 = {
+		.version = 6,                                    // __u8:4
+		.priority = ip4->tos >> 4,                       // __u8:4
+		.flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
+		.payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
+		.nexthdr = ip4->protocol,                        // __u8
+		.hop_limit = ip4->ttl,                           // __u8
+	};
+	ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.saddr.in6_u.u6_addr32[1] = 0;
+	ip6.saddr.in6_u.u6_addr32[2] = 0;
+	ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+	ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.daddr.in6_u.u6_addr32[1] = 0;
+	ip6.daddr.in6_u.u6_addr32[2] = 0;
+	ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+	// Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6.version == 6
+	for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+		sum6 += ((__u16 *)&ip6)[i];
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+		return TC_ACT_OK;
+
+	// This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+	// In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+	// thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+	// However, we've already verified the ipv4 checksum is correct and thus 0.
+	// Thus we only need to add the ipv6 header's sum.
+	//
+	// bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+	// (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
+	bpf_csum_update(skb, sum6);
+
+	// bpf_skb_change_proto() invalidates all pointers - reload them.
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+
+	// I cannot think of any valid way for this error condition to trigger, however I do
+	// believe the explicit check is required to keep the in kernel ebpf verifier happy.
+	if (data + l2_header_size + sizeof(ip6) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+	// Copy over the new ipv4 header.
+	*(struct ipv6hdr *)(new_eth + 1) = ip6;
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index c9c4b9d65839..0a6359bed0b9 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -40,8 +40,8 @@ run_one() {
 
 	ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
 	tc -n "${PEER_NS}" qdisc add dev veth1 clsact
-	tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6  direct-action
-	tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
+	tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6  direct-action
+	tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action
         echo ${rx_args}
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 
@@ -88,8 +88,8 @@ if [ ! -f ${BPF_FILE} ]; then
 	exit -1
 fi
 
-if [ ! -f bpf/nat6to4.o ]; then
-	echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first"
+if [ ! -f nat6to4.o ]; then
+	echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first"
 	exit -1
 fi
 
-- 
cgit v1.2.3


From edac4b5b185ed3d2717b8267e28e0a1187c0bc08 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 16 Jan 2023 11:10:08 +0100
Subject: selftests/bpf: Add
 serial_test_kprobe_multi_bench_attach_kernel/module tests

Add bench test for module portion of the symbols as well.

  # ./test_progs -v -t kprobe_multi_bench_attach_module
  bpf_testmod.ko is already unloaded.
  Loading bpf_testmod.ko...
  Successfully loaded bpf_testmod.ko.
  test_kprobe_multi_bench_attach:PASS:get_syms 0 nsec
  test_kprobe_multi_bench_attach:PASS:kprobe_multi_empty__open_and_load 0 nsec
  test_kprobe_multi_bench_attach:PASS:bpf_program__attach_kprobe_multi_opts 0 nsec
  test_kprobe_multi_bench_attach: found 26620 functions
  test_kprobe_multi_bench_attach: attached in   0.182s
  test_kprobe_multi_bench_attach: detached in   0.082s
  #96      kprobe_multi_bench_attach_module:OK
  Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED
  Successfully unloaded bpf_testmod.ko.

It's useful for testing kprobe multi link modules resolving.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20230116101009.23694-3-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/kprobe_multi_test.c      | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index c6f37e825f11..113dba349a57 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -322,7 +322,7 @@ static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
 	return strcmp((const char *) key1, (const char *) key2) == 0;
 }
 
-static int get_syms(char ***symsp, size_t *cntp)
+static int get_syms(char ***symsp, size_t *cntp, bool kernel)
 {
 	size_t cap = 0, cnt = 0, i;
 	char *name = NULL, **syms = NULL;
@@ -349,8 +349,9 @@ static int get_syms(char ***symsp, size_t *cntp)
 	}
 
 	while (fgets(buf, sizeof(buf), f)) {
-		/* skip modules */
-		if (strchr(buf, '['))
+		if (kernel && strchr(buf, '['))
+			continue;
+		if (!kernel && !strchr(buf, '['))
 			continue;
 
 		free(name);
@@ -404,7 +405,7 @@ error:
 	return err;
 }
 
-void serial_test_kprobe_multi_bench_attach(void)
+static void test_kprobe_multi_bench_attach(bool kernel)
 {
 	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
 	struct kprobe_multi_empty *skel = NULL;
@@ -415,7 +416,7 @@ void serial_test_kprobe_multi_bench_attach(void)
 	char **syms = NULL;
 	size_t cnt = 0, i;
 
-	if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
+	if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
 		return;
 
 	skel = kprobe_multi_empty__open_and_load();
@@ -453,6 +454,14 @@ cleanup:
 	}
 }
 
+void serial_test_kprobe_multi_bench_attach(void)
+{
+	if (test__start_subtest("kernel"))
+		test_kprobe_multi_bench_attach(true);
+	if (test__start_subtest("modules"))
+		test_kprobe_multi_bench_attach(false);
+}
+
 void test_kprobe_multi_test(void)
 {
 	if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
-- 
cgit v1.2.3


From 79168a669d8125453c8a271115f1ffd4294e61f6 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 21 Jan 2023 05:52:31 +0530
Subject: bpf: Fix missing var_off check for ARG_PTR_TO_DYNPTR

Currently, the dynptr function is not checking the variable offset part
of PTR_TO_STACK that it needs to check. The fixed offset is considered
when computing the stack pointer index, but if the variable offset was
not a constant (such that it could not be accumulated in reg->off), we
will end up a discrepency where runtime pointer does not point to the
actual stack slot we mark as STACK_DYNPTR.

It is impossible to precisely track dynptr state when variable offset is
not constant, hence, just like bpf_timer, kptr, bpf_spin_lock, etc.
simply reject the case where reg->var_off is not constant. Then,
consider both reg->off and reg->var_off.value when computing the stack
pointer index.

A new helper dynptr_get_spi is introduced to hide over these details
since the dynptr needs to be located in multiple places outside the
process_dynptr_func checks, hence once we know it's a PTR_TO_STACK, we
need to enforce these checks in all places.

Note that it is disallowed for unprivileged users to have a non-constant
var_off, so this problem should only be possible to trigger from
programs having CAP_PERFMON. However, its effects can vary.

Without the fix, it is possible to replace the contents of the dynptr
arbitrarily by making verifier mark different stack slots than actual
location and then doing writes to the actual stack address of dynptr at
runtime.

Fixes: 97e03f521050 ("bpf: Add verifier support for dynptrs")
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                              | 84 +++++++++++++++++-----
 .../selftests/bpf/prog_tests/kfunc_dynptr_param.c  |  2 +-
 tools/testing/selftests/bpf/progs/dynptr_fail.c    |  4 +-
 3 files changed, 69 insertions(+), 21 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 39d8ee38c338..76afdbea425a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -638,11 +638,34 @@ static void print_liveness(struct bpf_verifier_env *env,
 		verbose(env, "D");
 }
 
-static int get_spi(s32 off)
+static int __get_spi(s32 off)
 {
 	return (-off - 1) / BPF_REG_SIZE;
 }
 
+static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+	int off, spi;
+
+	if (!tnum_is_const(reg->var_off)) {
+		verbose(env, "dynptr has to be at a constant offset\n");
+		return -EINVAL;
+	}
+
+	off = reg->off + reg->var_off.value;
+	if (off % BPF_REG_SIZE) {
+		verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+		return -EINVAL;
+	}
+
+	spi = __get_spi(off);
+	if (spi < 1) {
+		verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+		return -EINVAL;
+	}
+	return spi;
+}
+
 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
 {
 	int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
@@ -754,7 +777,9 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 	enum bpf_dynptr_type type;
 	int spi, i, id;
 
-	spi = get_spi(reg->off);
+	spi = dynptr_get_spi(env, reg);
+	if (spi < 0)
+		return spi;
 
 	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 		return -EINVAL;
@@ -792,7 +817,9 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
 	struct bpf_func_state *state = func(env, reg);
 	int spi, i;
 
-	spi = get_spi(reg->off);
+	spi = dynptr_get_spi(env, reg);
+	if (spi < 0)
+		return spi;
 
 	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 		return -EINVAL;
@@ -844,7 +871,11 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
 	if (reg->type == CONST_PTR_TO_DYNPTR)
 		return false;
 
-	spi = get_spi(reg->off);
+	spi = dynptr_get_spi(env, reg);
+	if (spi < 0)
+		return false;
+
+	/* We will do check_mem_access to check and update stack bounds later */
 	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
 		return true;
 
@@ -860,14 +891,15 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
-	int spi;
-	int i;
+	int spi, i;
 
 	/* This already represents first slot of initialized bpf_dynptr */
 	if (reg->type == CONST_PTR_TO_DYNPTR)
 		return true;
 
-	spi = get_spi(reg->off);
+	spi = dynptr_get_spi(env, reg);
+	if (spi < 0)
+		return false;
 	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
 	    !state->stack[spi].spilled_ptr.dynptr.first_slot)
 		return false;
@@ -896,7 +928,9 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg
 	if (reg->type == CONST_PTR_TO_DYNPTR) {
 		return reg->dynptr.type == dynptr_type;
 	} else {
-		spi = get_spi(reg->off);
+		spi = dynptr_get_spi(env, reg);
+		if (spi < 0)
+			return false;
 		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
 	}
 }
@@ -2429,7 +2463,9 @@ static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *
 	 */
 	if (reg->type == CONST_PTR_TO_DYNPTR)
 		return 0;
-	spi = get_spi(reg->off);
+	spi = dynptr_get_spi(env, reg);
+	if (spi < 0)
+		return spi;
 	/* Caller ensures dynptr is valid and initialized, which means spi is in
 	 * bounds and spi is the first dynptr slot. Simply mark stack slot as
 	 * read.
@@ -5992,12 +6028,15 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
 	}
 	/* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
 	 * check_func_arg_reg_off's logic. We only need to check offset
-	 * alignment for PTR_TO_STACK.
+	 * and its alignment for PTR_TO_STACK.
 	 */
-	if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) {
-		verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off);
-		return -EINVAL;
+	if (reg->type == PTR_TO_STACK) {
+		int err = dynptr_get_spi(env, reg);
+
+		if (err < 0)
+			return err;
 	}
+
 	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
 	 *		 constructing a mutable bpf_dynptr object.
 	 *
@@ -6405,15 +6444,16 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	}
 }
 
-static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
 	int spi;
 
 	if (reg->type == CONST_PTR_TO_DYNPTR)
 		return reg->ref_obj_id;
-
-	spi = get_spi(reg->off);
+	spi = dynptr_get_spi(env, reg);
+	if (spi < 0)
+		return spi;
 	return state->stack[spi].spilled_ptr.ref_obj_id;
 }
 
@@ -6487,7 +6527,9 @@ skip_type_check:
 			 * PTR_TO_STACK.
 			 */
 			if (reg->type == PTR_TO_STACK) {
-				spi = get_spi(reg->off);
+				spi = dynptr_get_spi(env, reg);
+				if (spi < 0)
+					return spi;
 				if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
 				    !state->stack[spi].spilled_ptr.ref_obj_id) {
 					verbose(env, "arg %d is an unacquired reference\n", regno);
@@ -7977,13 +8019,19 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
 			if (arg_type_is_dynptr(fn->arg_type[i])) {
 				struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
+				int ref_obj_id;
 
 				if (meta.ref_obj_id) {
 					verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
 					return -EFAULT;
 				}
 
-				meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
+				ref_obj_id = dynptr_ref_obj_id(env, reg);
+				if (ref_obj_id < 0) {
+					verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
+					return ref_obj_id;
+				}
+				meta.ref_obj_id = ref_obj_id;
 				break;
 			}
 		}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
index a9229260a6ce..72800b1e8395 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -18,7 +18,7 @@ static struct {
 	const char *expected_verifier_err_msg;
 	int expected_runtime_err;
 } kfunc_dynptr_tests[] = {
-	{"not_valid_dynptr", "Expected an initialized dynptr as arg #1", 0},
+	{"not_valid_dynptr", "cannot pass in dynptr at an offset=-8", 0},
 	{"not_ptr_to_stack", "arg#0 expected pointer to stack or dynptr_ptr", 0},
 	{"dynptr_data_null", NULL, -EBADMSG},
 };
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 78debc1b3820..02d57b95cf6e 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -382,7 +382,7 @@ int invalid_helper1(void *ctx)
 
 /* A dynptr can't be passed into a helper function at a non-zero offset */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("cannot pass in dynptr at an offset=-8")
 int invalid_helper2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -584,7 +584,7 @@ int invalid_read4(void *ctx)
 
 /* Initializing a dynptr on an offset should fail */
 SEC("?raw_tp")
-__failure __msg("invalid write to stack")
+__failure __msg("cannot pass in dynptr at an offset=0")
 int invalid_offset(void *ctx)
 {
 	struct bpf_dynptr ptr;
-- 
cgit v1.2.3


From ef8fc7a07c0e161841779d6fe3f6acd5a05c547c Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 21 Jan 2023 05:52:32 +0530
Subject: bpf: Fix partial dynptr stack slot reads/writes

Currently, while reads are disallowed for dynptr stack slots, writes are
not. Reads don't work from both direct access and helpers, while writes
do work in both cases, but have the effect of overwriting the slot_type.

While this is fine, handling for a few edge cases is missing. Firstly,
a user can overwrite the stack slots of dynptr partially.

Consider the following layout:
spi: [d][d][?]
      2  1  0

First slot is at spi 2, second at spi 1.
Now, do a write of 1 to 8 bytes for spi 1.

This will essentially either write STACK_MISC for all slot_types or
STACK_MISC and STACK_ZERO (in case of size < BPF_REG_SIZE partial write
of zeroes). The end result is that slot is scrubbed.

Now, the layout is:
spi: [d][m][?]
      2  1  0

Suppose if user initializes spi = 1 as dynptr.
We get:
spi: [d][d][d]
      2  1  0

But this time, both spi 2 and spi 1 have first_slot = true.

Now, when passing spi 2 to dynptr helper, it will consider it as
initialized as it does not check whether second slot has first_slot ==
false. And spi 1 should already work as normal.

This effectively replaced size + offset of first dynptr, hence allowing
invalid OOB reads and writes.

Make a few changes to protect against this:
When writing to PTR_TO_STACK using BPF insns, when we touch spi of a
STACK_DYNPTR type, mark both first and second slot (regardless of which
slot we touch) as STACK_INVALID. Reads are already prevented.

Second, prevent writing	to stack memory from helpers if the range may
contain any STACK_DYNPTR slots. Reads are already prevented.

For helpers, we cannot allow it to destroy dynptrs from the writes as
depending on arguments, helper may take uninit_mem and dynptr both at
the same time. This would mean that helper may write to uninit_mem
before it reads the dynptr, which would be bad.

PTR_TO_MEM: [?????dd]

Depending on the code inside the helper, it may end up overwriting the
dynptr contents first and then read those as the dynptr argument.

Verifier would only simulate destruction when it does byte by byte
access simulation in check_helper_call for meta.access_size, and
fail to catch this case, as it happens after argument checks.

The same would need to be done for any other non-trivial objects created
on the stack in the future, such as bpf_list_head on stack, or
bpf_rb_root on stack.

A common misunderstanding in the current code is that MEM_UNINIT means
writes, but note that writes may also be performed even without
MEM_UNINIT in case of helpers, in that case the code after handling meta
&& meta->raw_mode will complain when it sees STACK_DYNPTR. So that
invalid read case also covers writes to potential STACK_DYNPTR slots.
The only loophole was in case of meta->raw_mode which simulated writes
through instructions which could overwrite them.

A future series sequenced after this will focus on the clean up of
helper access checks and bugs around that.

Fixes: 97e03f521050 ("bpf: Add verifier support for dynptrs")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                           | 88 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/dynptr_fail.c |  6 +-
 2 files changed, 91 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 76afdbea425a..5c7f29ca94ec 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -769,6 +769,8 @@ static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
 	__mark_dynptr_reg(reg, type, true);
 }
 
+static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
+				        struct bpf_func_state *state, int spi);
 
 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 				   enum bpf_arg_type arg_type, int insn_idx)
@@ -863,6 +865,55 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
 	return 0;
 }
 
+static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+			       struct bpf_reg_state *reg);
+
+static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
+				        struct bpf_func_state *state, int spi)
+{
+	int i;
+
+	/* We always ensure that STACK_DYNPTR is never set partially,
+	 * hence just checking for slot_type[0] is enough. This is
+	 * different for STACK_SPILL, where it may be only set for
+	 * 1 byte, so code has to use is_spilled_reg.
+	 */
+	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
+		return 0;
+
+	/* Reposition spi to first slot */
+	if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
+		spi = spi + 1;
+
+	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
+		verbose(env, "cannot overwrite referenced dynptr\n");
+		return -EINVAL;
+	}
+
+	mark_stack_slot_scratched(env, spi);
+	mark_stack_slot_scratched(env, spi - 1);
+
+	/* Writing partially to one dynptr stack slot destroys both. */
+	for (i = 0; i < BPF_REG_SIZE; i++) {
+		state->stack[spi].slot_type[i] = STACK_INVALID;
+		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
+	}
+
+	/* TODO: Invalidate any slices associated with this dynptr */
+
+	/* Do not release reference state, we are destroying dynptr on stack,
+	 * not using some helper to release it. Just reset register.
+	 */
+	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
+
+	/* Same reason as unmark_stack_slots_dynptr above */
+	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+	state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
+	return 0;
+}
+
 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
@@ -3391,6 +3442,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
 	}
 
+	err = destroy_if_dynptr_stack_slot(env, state, spi);
+	if (err)
+		return err;
+
 	mark_stack_slot_scratched(env, spi);
 	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
 	    !register_is_null(reg) && env->bpf_capable) {
@@ -3504,6 +3559,14 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 	if (err)
 		return err;
 
+	for (i = min_off; i < max_off; i++) {
+		int spi;
+
+		spi = __get_spi(i);
+		err = destroy_if_dynptr_stack_slot(env, state, spi);
+		if (err)
+			return err;
+	}
 
 	/* Variable offset writes destroy any spilled pointers in range. */
 	for (i = min_off; i < max_off; i++) {
@@ -5531,6 +5594,31 @@ static int check_stack_range_initialized(
 	}
 
 	if (meta && meta->raw_mode) {
+		/* Ensure we won't be overwriting dynptrs when simulating byte
+		 * by byte access in check_helper_call using meta.access_size.
+		 * This would be a problem if we have a helper in the future
+		 * which takes:
+		 *
+		 *	helper(uninit_mem, len, dynptr)
+		 *
+		 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
+		 * may end up writing to dynptr itself when touching memory from
+		 * arg 1. This can be relaxed on a case by case basis for known
+		 * safe cases, but reject due to the possibilitiy of aliasing by
+		 * default.
+		 */
+		for (i = min_off; i < max_off + access_size; i++) {
+			int stack_off = -i - 1;
+
+			spi = __get_spi(i);
+			/* raw_mode may write past allocated_stack */
+			if (state->allocated_stack <= stack_off)
+				continue;
+			if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
+				verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
+				return -EACCES;
+			}
+		}
 		meta->access_size = access_size;
 		meta->regno = regno;
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 02d57b95cf6e..9dc3f23a8270 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -420,7 +420,7 @@ int invalid_write1(void *ctx)
  * offset
  */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("cannot overwrite referenced dynptr")
 int invalid_write2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -444,7 +444,7 @@ int invalid_write2(void *ctx)
  * non-const offset
  */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("cannot overwrite referenced dynptr")
 int invalid_write3(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -476,7 +476,7 @@ static int invalid_write4_callback(__u32 index, void *data)
  * be invalidated as a dynptr
  */
 SEC("?raw_tp")
-__failure __msg("arg 1 is an unacquired reference")
+__failure __msg("cannot overwrite referenced dynptr")
 int invalid_write4(void *ctx)
 {
 	struct bpf_dynptr ptr;
-- 
cgit v1.2.3


From f8064ab90d6644bc8338d2d7ff6a0d6e7a1b2ef3 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 21 Jan 2023 05:52:33 +0530
Subject: bpf: Invalidate slices on destruction of dynptrs on stack

The previous commit implemented destroy_if_dynptr_stack_slot. It
destroys the dynptr which given spi belongs to, but still doesn't
invalidate the slices that belong to such a dynptr. While for the case
of referenced dynptr, we don't allow their overwrite and return an error
early, we still allow it and destroy the dynptr for unreferenced dynptr.

To be able to enable precise and scoped invalidation of dynptr slices in
this case, we must be able to associate the source dynptr of slices that
have been obtained using bpf_dynptr_data. When doing destruction, only
slices belonging to the dynptr being destructed should be invalidated,
and nothing else. Currently, dynptr slices belonging to different
dynptrs are indistinguishible.

Hence, allocate a unique id to each dynptr (CONST_PTR_TO_DYNPTR and
those on stack). This will be stored as part of reg->id. Whenever using
bpf_dynptr_data, transfer this unique dynptr id to the returned
PTR_TO_MEM_OR_NULL slice pointer, and store it in a new per-PTR_TO_MEM
dynptr_id register state member.

Finally, after establishing such a relationship between dynptrs and
their slices, implement precise invalidation logic that only invalidates
slices belong to the destroyed dynptr in destroy_if_dynptr_stack_slot.

Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                    |  5 +-
 kernel/bpf/verifier.c                           | 74 +++++++++++++++++++++----
 tools/testing/selftests/bpf/progs/dynptr_fail.c |  4 +-
 3 files changed, 68 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 127058cfec47..aa83de1fe755 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -70,7 +70,10 @@ struct bpf_reg_state {
 			u32 btf_id;
 		};
 
-		u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
+		struct { /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
+			u32 mem_size;
+			u32 dynptr_id; /* for dynptr slices */
+		};
 
 		/* For dynptr stack slots */
 		struct {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5c7f29ca94ec..01cb802776fd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -255,6 +255,7 @@ struct bpf_call_arg_meta {
 	int mem_size;
 	u64 msize_max_value;
 	int ref_obj_id;
+	int dynptr_id;
 	int map_uid;
 	int func_id;
 	struct btf *btf;
@@ -750,23 +751,27 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
 
 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
 			      enum bpf_dynptr_type type,
-			      bool first_slot);
+			      bool first_slot, int dynptr_id);
 
 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 				struct bpf_reg_state *reg);
 
-static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
+static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *sreg1,
 				   struct bpf_reg_state *sreg2,
 				   enum bpf_dynptr_type type)
 {
-	__mark_dynptr_reg(sreg1, type, true);
-	__mark_dynptr_reg(sreg2, type, false);
+	int id = ++env->id_gen;
+
+	__mark_dynptr_reg(sreg1, type, true, id);
+	__mark_dynptr_reg(sreg2, type, false, id);
 }
 
-static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
+static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
+			       struct bpf_reg_state *reg,
 			       enum bpf_dynptr_type type)
 {
-	__mark_dynptr_reg(reg, type, true);
+	__mark_dynptr_reg(reg, type, true, ++env->id_gen);
 }
 
 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
@@ -795,7 +800,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
 	if (type == BPF_DYNPTR_TYPE_INVALID)
 		return -EINVAL;
 
-	mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
+	mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
 			       &state->stack[spi - 1].spilled_ptr, type);
 
 	if (dynptr_type_refcounted(type)) {
@@ -871,7 +876,9 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
 				        struct bpf_func_state *state, int spi)
 {
-	int i;
+	struct bpf_func_state *fstate;
+	struct bpf_reg_state *dreg;
+	int i, dynptr_id;
 
 	/* We always ensure that STACK_DYNPTR is never set partially,
 	 * hence just checking for slot_type[0] is enough. This is
@@ -899,7 +906,19 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
 		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
 	}
 
-	/* TODO: Invalidate any slices associated with this dynptr */
+	dynptr_id = state->stack[spi].spilled_ptr.id;
+	/* Invalidate any slices associated with this dynptr */
+	bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
+		/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
+		if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
+			continue;
+		if (dreg->dynptr_id == dynptr_id) {
+			if (!env->allow_ptr_leaks)
+				__mark_reg_not_init(env, dreg);
+			else
+				__mark_reg_unknown(env, dreg);
+		}
+	}));
 
 	/* Do not release reference state, we are destroying dynptr on stack,
 	 * not using some helper to release it. Just reset register.
@@ -1562,7 +1581,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
 }
 
 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
-			      bool first_slot)
+			      bool first_slot, int dynptr_id)
 {
 	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
 	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
@@ -1570,6 +1589,8 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type ty
 	 */
 	__mark_reg_known_zero(reg);
 	reg->type = CONST_PTR_TO_DYNPTR;
+	/* Give each dynptr a unique id to uniquely associate slices to it. */
+	reg->id = dynptr_id;
 	reg->dynptr.type = type;
 	reg->dynptr.first_slot = first_slot;
 }
@@ -6532,6 +6553,19 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	}
 }
 
+static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+	struct bpf_func_state *state = func(env, reg);
+	int spi;
+
+	if (reg->type == CONST_PTR_TO_DYNPTR)
+		return reg->id;
+	spi = dynptr_get_spi(env, reg);
+	if (spi < 0)
+		return spi;
+	return state->stack[spi].spilled_ptr.id;
+}
+
 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
@@ -7601,7 +7635,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
 	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
 	 */
 	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
-	mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
+	mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
 	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
 
 	/* unused */
@@ -8107,18 +8141,31 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
 			if (arg_type_is_dynptr(fn->arg_type[i])) {
 				struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
-				int ref_obj_id;
+				int id, ref_obj_id;
+
+				if (meta.dynptr_id) {
+					verbose(env, "verifier internal error: meta.dynptr_id already set\n");
+					return -EFAULT;
+				}
 
 				if (meta.ref_obj_id) {
 					verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
 					return -EFAULT;
 				}
 
+				id = dynptr_id(env, reg);
+				if (id < 0) {
+					verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+					return id;
+				}
+
 				ref_obj_id = dynptr_ref_obj_id(env, reg);
 				if (ref_obj_id < 0) {
 					verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
 					return ref_obj_id;
 				}
+
+				meta.dynptr_id = id;
 				meta.ref_obj_id = ref_obj_id;
 				break;
 			}
@@ -8275,6 +8322,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		return -EFAULT;
 	}
 
+	if (is_dynptr_ref_function(func_id))
+		regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
+
 	if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
 		/* For release_reference() */
 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 9dc3f23a8270..e43000c63c66 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -67,7 +67,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr)
  * bpf_ringbuf_submit/discard_dynptr call
  */
 SEC("?raw_tp")
-__failure __msg("Unreleased reference id=1")
+__failure __msg("Unreleased reference id=2")
 int ringbuf_missing_release1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -80,7 +80,7 @@ int ringbuf_missing_release1(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("Unreleased reference id=2")
+__failure __msg("Unreleased reference id=4")
 int ringbuf_missing_release2(void *ctx)
 {
 	struct bpf_dynptr ptr1, ptr2;
-- 
cgit v1.2.3


From 91b875a5e43b3a8dec4fbdca067c8860004b5f0e Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Sat, 21 Jan 2023 05:52:37 +0530
Subject: selftests/bpf: convenience macro for use with 'asm volatile' blocks

A set of macros useful for writing naked BPF functions using inline
assembly. E.g. as follows:

struct map_struct {
	...
} map SEC(".maps");

SEC(...)
__naked int foo_test(void)
{
	asm volatile(
		"r0 = 0;"
		"*(u64*)(r10 - 8) = r0;"
		"r1 = %[map] ll;"
		"r2 = r10;"
		"r2 += -8;"
		"call %[bpf_map_lookup_elem];"
		"r0 = 0;"
		"exit;"
		:
		: __imm(bpf_map_lookup_elem),
		  __imm_addr(map)
		: __clobber_all);
}

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
[ Kartikeya: Add acks, include __clobber_common from Andrii ]
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-9-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/bpf_misc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 4a01ea9113bf..2d7b89b447b2 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -7,6 +7,13 @@
 #define __success		__attribute__((btf_decl_tag("comment:test_expect_success")))
 #define __log_level(lvl)	__attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
 
+/* Convenience macro for use with 'asm volatile' blocks */
+#define __naked __attribute__((naked))
+#define __clobber_all "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory"
+#define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory"
+#define __imm(name) [name]"i"(name)
+#define __imm_addr(name) [name]"i"(&name)
+
 #if defined(__TARGET_ARCH_x86)
 #define SYSCALL_WRAPPER 1
 #define SYS_PREFIX "__x64_"
-- 
cgit v1.2.3


From f4d24edf1b9249e43282ac2572d43d9ad10faf43 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 21 Jan 2023 05:52:38 +0530
Subject: selftests/bpf: Add dynptr pruning tests

Add verifier tests that verify the new pruning behavior for STACK_DYNPTR
slots, and ensure that state equivalence takes into account changes to
the old and current verifier state correctly. Also ensure that the
stacksafe changes are actually enabling pruning in case states are
equivalent from pruning PoV.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-10-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/dynptr_fail.c | 141 ++++++++++++++++++++++++
 1 file changed, 141 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index e43000c63c66..f1e047877279 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -35,6 +35,13 @@ struct {
 	__type(value, __u32);
 } array_map3 SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array_map4 SEC(".maps");
+
 struct sample {
 	int pid;
 	long value;
@@ -653,3 +660,137 @@ int dynptr_from_mem_invalid_api(void *ctx)
 
 	return 0;
 }
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_pruning_overwrite(struct __sk_buff *ctx)
+{
+	asm volatile (
+		"r9 = 0xeB9F;				\
+		 r6 = %[ringbuf] ll;			\
+		 r1 = r6;				\
+		 r2 = 8;				\
+		 r3 = 0;				\
+		 r4 = r10;				\
+		 r4 += -16;				\
+		 call %[bpf_ringbuf_reserve_dynptr];	\
+		 if r0 == 0 goto pjmp1;			\
+		 goto pjmp2;				\
+	pjmp1:						\
+		 *(u64 *)(r10 - 16) = r9;		\
+	pjmp2:						\
+		 r1 = r10;				\
+		 r1 += -16;				\
+		 r2 = 0;				\
+		 call %[bpf_ringbuf_discard_dynptr];	"
+		:
+		: __imm(bpf_ringbuf_reserve_dynptr),
+		  __imm(bpf_ringbuf_discard_dynptr),
+		  __imm_addr(ringbuf)
+		: __clobber_all
+	);
+	return 0;
+}
+
+SEC("?tc")
+__success __msg("12: safe") __log_level(2)
+int dynptr_pruning_stacksafe(struct __sk_buff *ctx)
+{
+	asm volatile (
+		"r9 = 0xeB9F;				\
+		 r6 = %[ringbuf] ll;			\
+		 r1 = r6;				\
+		 r2 = 8;				\
+		 r3 = 0;				\
+		 r4 = r10;				\
+		 r4 += -16;				\
+		 call %[bpf_ringbuf_reserve_dynptr];	\
+		 if r0 == 0 goto stjmp1;		\
+		 goto stjmp2;				\
+	stjmp1:						\
+		 r9 = r9;				\
+	stjmp2:						\
+		 r1 = r10;				\
+		 r1 += -16;				\
+		 r2 = 0;				\
+		 call %[bpf_ringbuf_discard_dynptr];	"
+		:
+		: __imm(bpf_ringbuf_reserve_dynptr),
+		  __imm(bpf_ringbuf_discard_dynptr),
+		  __imm_addr(ringbuf)
+		: __clobber_all
+	);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_pruning_type_confusion(struct __sk_buff *ctx)
+{
+	asm volatile (
+		"r6 = %[array_map4] ll;			\
+		 r7 = %[ringbuf] ll;			\
+		 r1 = r6;				\
+		 r2 = r10;				\
+		 r2 += -8;				\
+		 r9 = 0;				\
+		 *(u64 *)(r2 + 0) = r9;			\
+		 r3 = r10;				\
+		 r3 += -24;				\
+		 r9 = 0xeB9FeB9F;			\
+		 *(u64 *)(r10 - 16) = r9;		\
+		 *(u64 *)(r10 - 24) = r9;		\
+		 r9 = 0;				\
+		 r4 = 0;				\
+		 r8 = r2;				\
+		 call %[bpf_map_update_elem];		\
+		 r1 = r6;				\
+		 r2 = r8;				\
+		 call %[bpf_map_lookup_elem];		\
+		 if r0 != 0 goto tjmp1;			\
+		 exit;					\
+	tjmp1:						\
+		 r8 = r0;				\
+		 r1 = r7;				\
+		 r2 = 8;				\
+		 r3 = 0;				\
+		 r4 = r10;				\
+		 r4 += -16;				\
+		 r0 = *(u64 *)(r0 + 0);			\
+		 call %[bpf_ringbuf_reserve_dynptr];	\
+		 if r0 == 0 goto tjmp2;			\
+		 r8 = r8;				\
+		 r8 = r8;				\
+		 r8 = r8;				\
+		 r8 = r8;				\
+		 r8 = r8;				\
+		 r8 = r8;				\
+		 r8 = r8;				\
+		 goto tjmp3;				\
+	tjmp2:						\
+		 *(u64 *)(r10 - 8) = r9;		\
+		 *(u64 *)(r10 - 16) = r9;		\
+		 r1 = r8;				\
+		 r1 += 8;				\
+		 r2 = 0;				\
+		 r3 = 0;				\
+		 r4 = r10;				\
+		 r4 += -16;				\
+		 call %[bpf_dynptr_from_mem];		\
+	tjmp3:						\
+		 r1 = r10;				\
+		 r1 += -16;				\
+		 r2 = 0;				\
+		 call %[bpf_ringbuf_discard_dynptr];	"
+		:
+		: __imm(bpf_map_update_elem),
+		  __imm(bpf_map_lookup_elem),
+		  __imm(bpf_ringbuf_reserve_dynptr),
+		  __imm(bpf_dynptr_from_mem),
+		  __imm(bpf_ringbuf_discard_dynptr),
+		  __imm_addr(array_map4),
+		  __imm_addr(ringbuf)
+		: __clobber_all
+	);
+	return 0;
+}
-- 
cgit v1.2.3


From ef4810135396735c1a6b1c343c3cc4fe4be96a43 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 21 Jan 2023 05:52:39 +0530
Subject: selftests/bpf: Add dynptr var_off tests

Ensure that variable offset is handled correctly, and verifier takes
both fixed and variable part into account. Also ensures that only
constant var_off is allowed.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-11-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/dynptr_fail.c | 40 +++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index f1e047877279..2d899f2bebb0 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -794,3 +794,43 @@ int dynptr_pruning_type_confusion(struct __sk_buff *ctx)
 	);
 	return 0;
 }
+
+SEC("?tc")
+__failure __msg("dynptr has to be at a constant offset") __log_level(2)
+int dynptr_var_off_overwrite(struct __sk_buff *ctx)
+{
+	asm volatile (
+		"r9 = 16;				\
+		 *(u32 *)(r10 - 4) = r9;		\
+		 r8 = *(u32 *)(r10 - 4);		\
+		 if r8 >= 0 goto vjmp1;			\
+		 r0 = 1;				\
+		 exit;					\
+	vjmp1:						\
+		 if r8 <= 16 goto vjmp2;		\
+		 r0 = 1;				\
+		 exit;					\
+	vjmp2:						\
+		 r8 &= 16;				\
+		 r1 = %[ringbuf] ll;			\
+		 r2 = 8;				\
+		 r3 = 0;				\
+		 r4 = r10;				\
+		 r4 += -32;				\
+		 r4 += r8;				\
+		 call %[bpf_ringbuf_reserve_dynptr];	\
+		 r9 = 0xeB9F;				\
+		 *(u64 *)(r10 - 16) = r9;		\
+		 r1 = r10;				\
+		 r1 += -32;				\
+		 r1 += r8;				\
+		 r2 = 0;				\
+		 call %[bpf_ringbuf_discard_dynptr];	"
+		:
+		: __imm(bpf_ringbuf_reserve_dynptr),
+		  __imm(bpf_ringbuf_discard_dynptr),
+		  __imm_addr(ringbuf)
+		: __clobber_all
+	);
+	return 0;
+}
-- 
cgit v1.2.3


From 011edc8e49b8551dfb6cfcc8601d05e029cf5994 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 21 Jan 2023 05:52:40 +0530
Subject: selftests/bpf: Add dynptr partial slot overwrite tests

Try creating a dynptr, then overwriting second slot with first slot of
another dynptr. Then, the first slot of first dynptr should also be
invalidated, but without our fix that does not happen. As a consequence,
the unfixed case allows passing first dynptr (as the kernel check only
checks for slot_type and then first_slot == true).

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-12-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/dynptr_fail.c | 66 +++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 2d899f2bebb0..1cbec5468879 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -834,3 +834,69 @@ int dynptr_var_off_overwrite(struct __sk_buff *ctx)
 	);
 	return 0;
 }
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_partial_slot_invalidate(struct __sk_buff *ctx)
+{
+	asm volatile (
+		"r6 = %[ringbuf] ll;			\
+		 r7 = %[array_map4] ll;			\
+		 r1 = r7;				\
+		 r2 = r10;				\
+		 r2 += -8;				\
+		 r9 = 0;				\
+		 *(u64 *)(r2 + 0) = r9;			\
+		 r3 = r2;				\
+		 r4 = 0;				\
+		 r8 = r2;				\
+		 call %[bpf_map_update_elem];		\
+		 r1 = r7;				\
+		 r2 = r8;				\
+		 call %[bpf_map_lookup_elem];		\
+		 if r0 != 0 goto sjmp1;			\
+		 exit;					\
+	sjmp1:						\
+		 r7 = r0;				\
+		 r1 = r6;				\
+		 r2 = 8;				\
+		 r3 = 0;				\
+		 r4 = r10;				\
+		 r4 += -24;				\
+		 call %[bpf_ringbuf_reserve_dynptr];	\
+		 *(u64 *)(r10 - 16) = r9;		\
+		 r1 = r7;				\
+		 r2 = 8;				\
+		 r3 = 0;				\
+		 r4 = r10;				\
+		 r4 += -16;				\
+		 call %[bpf_dynptr_from_mem];		\
+		 r1 = r10;				\
+		 r1 += -512;				\
+		 r2 = 488;				\
+		 r3 = r10;				\
+		 r3 += -24;				\
+		 r4 = 0;				\
+		 r5 = 0;				\
+		 call %[bpf_dynptr_read];		\
+		 r8 = 1;				\
+		 if r0 != 0 goto sjmp2;			\
+		 r8 = 0;				\
+	sjmp2:						\
+		 r1 = r10;				\
+		 r1 += -24;				\
+		 r2 = 0;				\
+		 call %[bpf_ringbuf_discard_dynptr];	"
+		:
+		: __imm(bpf_map_update_elem),
+		  __imm(bpf_map_lookup_elem),
+		  __imm(bpf_ringbuf_reserve_dynptr),
+		  __imm(bpf_ringbuf_discard_dynptr),
+		  __imm(bpf_dynptr_from_mem),
+		  __imm(bpf_dynptr_read),
+		  __imm_addr(ringbuf),
+		  __imm_addr(array_map4)
+		: __clobber_all
+	);
+	return 0;
+}
-- 
cgit v1.2.3


From ae8e354c497af625eaecd3d86e04f9087762d42b Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 21 Jan 2023 05:52:41 +0530
Subject: selftests/bpf: Add dynptr helper tests

First test that we allow overwriting dynptr slots and reinitializing
them in unreferenced case, and disallow overwriting for referenced case.
Include tests to ensure slices obtained from destroyed dynptrs are being
invalidated on their destruction. The destruction needs to be scoped, as
in slices of dynptr A should not be invalidated when dynptr B is
destroyed. Next, test that MEM_UNINIT doesn't allow writing dynptr stack
slots.

Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230121002241.2113993-13-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/dynptr_fail.c | 192 ++++++++++++++++++++++++
 1 file changed, 192 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 1cbec5468879..5950ad6ec2e6 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -900,3 +900,195 @@ int dynptr_partial_slot_invalidate(struct __sk_buff *ctx)
 	);
 	return 0;
 }
+
+/* Test that it is allowed to overwrite unreferenced dynptr. */
+SEC("?raw_tp")
+__success
+int dynptr_overwrite_unref(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	if (get_map_val_dynptr(&ptr))
+		return 0;
+	if (get_map_val_dynptr(&ptr))
+		return 0;
+	if (get_map_val_dynptr(&ptr))
+		return 0;
+
+	return 0;
+}
+
+/* Test that slices are invalidated on reinitializing a dynptr. */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int dynptr_invalidate_slice_reinit(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	__u8 *p;
+
+	if (get_map_val_dynptr(&ptr))
+		return 0;
+	p = bpf_dynptr_data(&ptr, 0, 1);
+	if (!p)
+		return 0;
+	if (get_map_val_dynptr(&ptr))
+		return 0;
+	/* this should fail */
+	return *p;
+}
+
+/* Invalidation of dynptr slices on destruction of dynptr should not miss
+ * mem_or_null pointers.
+ */
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=percpu_ptr_")
+int dynptr_invalidate_slice_or_null(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	__u8 *p;
+
+	if (get_map_val_dynptr(&ptr))
+		return 0;
+
+	p = bpf_dynptr_data(&ptr, 0, 1);
+	*(__u8 *)&ptr = 0;
+	/* this should fail */
+	bpf_this_cpu_ptr(p);
+	return 0;
+}
+
+/* Destruction of dynptr should also any slices obtained from it */
+SEC("?raw_tp")
+__failure __msg("R7 invalid mem access 'scalar'")
+int dynptr_invalidate_slice_failure(void *ctx)
+{
+	struct bpf_dynptr ptr1;
+	struct bpf_dynptr ptr2;
+	__u8 *p1, *p2;
+
+	if (get_map_val_dynptr(&ptr1))
+		return 0;
+	if (get_map_val_dynptr(&ptr2))
+		return 0;
+
+	p1 = bpf_dynptr_data(&ptr1, 0, 1);
+	if (!p1)
+		return 0;
+	p2 = bpf_dynptr_data(&ptr2, 0, 1);
+	if (!p2)
+		return 0;
+
+	*(__u8 *)&ptr1 = 0;
+	/* this should fail */
+	return *p1;
+}
+
+/* Invalidation of slices should be scoped and should not prevent dereferencing
+ * slices of another dynptr after destroying unrelated dynptr
+ */
+SEC("?raw_tp")
+__success
+int dynptr_invalidate_slice_success(void *ctx)
+{
+	struct bpf_dynptr ptr1;
+	struct bpf_dynptr ptr2;
+	__u8 *p1, *p2;
+
+	if (get_map_val_dynptr(&ptr1))
+		return 1;
+	if (get_map_val_dynptr(&ptr2))
+		return 1;
+
+	p1 = bpf_dynptr_data(&ptr1, 0, 1);
+	if (!p1)
+		return 1;
+	p2 = bpf_dynptr_data(&ptr2, 0, 1);
+	if (!p2)
+		return 1;
+
+	*(__u8 *)&ptr1 = 0;
+	return *p2;
+}
+
+/* Overwriting referenced dynptr should be rejected */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int dynptr_overwrite_ref(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+	/* this should fail */
+	if (get_map_val_dynptr(&ptr))
+		bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* Reject writes to dynptr slot from bpf_dynptr_read */
+SEC("?raw_tp")
+__failure __msg("potential write to dynptr at off=-16")
+int dynptr_read_into_slot(void *ctx)
+{
+	union {
+		struct {
+			char _pad[48];
+			struct bpf_dynptr ptr;
+		};
+		char buf[64];
+	} data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &data.ptr);
+	/* this should fail */
+	bpf_dynptr_read(data.buf, sizeof(data.buf), &data.ptr, 0, 0);
+
+	return 0;
+}
+
+/* Reject writes to dynptr slot for uninit arg */
+SEC("?raw_tp")
+__failure __msg("potential write to dynptr at off=-16")
+int uninit_write_into_slot(void *ctx)
+{
+	struct {
+		char buf[64];
+		struct bpf_dynptr ptr;
+	} data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 80, 0, &data.ptr);
+	/* this should fail */
+	bpf_get_current_comm(data.buf, 80);
+
+	return 0;
+}
+
+static int callback(__u32 index, void *data)
+{
+        *(__u32 *)data = 123;
+
+        return 0;
+}
+
+/* If the dynptr is written into in a callback function, its data
+ * slices should be invalidated as well.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int invalid_data_slices(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	__u32 *slice;
+
+	if (get_map_val_dynptr(&ptr))
+		return 0;
+
+	slice = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
+	if (!slice)
+		return 0;
+
+	bpf_loop(10, callback, &ptr, 0);
+
+	/* this should fail */
+	*slice = 1;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 40535704624e980a560fd68635d98c2984ded572 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 19 Jan 2023 14:15:25 -0800
Subject: selftests/bpf: Update expected test_offload.py messages

Generic check has a different error message, update the selftest.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-7-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/test_offload.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 7cb1bc05e5cf..40cba8d368d9 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1039,7 +1039,7 @@ try:
     offload = bpf_pinned("/sys/fs/bpf/offload")
     ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
     fail(ret == 0, "attached offloaded XDP program to drv")
-    check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args)
+    check_extack(err, "Using offloaded program without HW_MODE flag is not supported.", args)
     rm("/sys/fs/bpf/offload")
     sim.wait_for_flush()
 
@@ -1088,12 +1088,12 @@ try:
     ret, _, err = sim.set_xdp(pinned, "offload",
                               fail=False, include_stderr=True)
     fail(ret == 0, "Pinned program loaded for a different device accepted")
-    check_extack_nsim(err, "program bound to different dev.", args)
+    check_extack(err, "Program bound to different device.", args)
     simdev2.remove()
     ret, _, err = sim.set_xdp(pinned, "offload",
                               fail=False, include_stderr=True)
     fail(ret == 0, "Pinned program loaded for a removed device accepted")
-    check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+    check_extack(err, "Program bound to different device.", args)
     rm(pin_file)
     bpftool_prog_list_wait(expected=0)
 
@@ -1334,12 +1334,12 @@ try:
     ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
                                fail=False, include_stderr=True)
     fail(ret == 0, "cross-ASIC program allowed")
-    check_extack_nsim(err, "program bound to different dev.", args)
+    check_extack(err, "Program bound to different device.", args)
     for d in simdevB.nsims:
         ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
                                 fail=False, include_stderr=True)
         fail(ret == 0, "cross-ASIC program allowed")
-        check_extack_nsim(err, "program bound to different dev.", args)
+        check_extack(err, "Program bound to different device.", args)
 
     start_test("Test multi-dev ASIC cross-dev map reuse...")
 
-- 
cgit v1.2.3


From e2a46d54d7a100c79c9d645184db9ce594c8f5f6 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 19 Jan 2023 14:15:30 -0800
Subject: selftests/bpf: Verify xdp_metadata xdp->af_xdp path

- create new netns
- create veth pair (veTX+veRX)
- setup AF_XDP socket for both interfaces
- attach bpf to veRX
- send packet via veTX
- verify the packet has expected metadata at veRX

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-12-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 tools/testing/selftests/bpf/Makefile               |   2 +-
 .../selftests/bpf/prog_tests/xdp_metadata.c        | 410 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/xdp_metadata.c   |  64 ++++
 tools/testing/selftests/bpf/progs/xdp_metadata2.c  |  23 ++
 tools/testing/selftests/bpf/xdp_metadata.h         |  15 +
 6 files changed, 514 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_metadata.c
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_metadata2.c
 create mode 100644 tools/testing/selftests/bpf/xdp_metadata.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 96e8371f5c2a..b9ef9cc61d36 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -86,5 +86,6 @@ xdp_adjust_tail                          # case-128 err 0 errno 28 retval 1 size
 xdp_bonding                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
 xdp_bpf2bpf                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
 xdp_do_redirect                          # prog_run_max_size unexpected error: -22 (errno 22)
+xdp_metadata                             # JIT does not support calling kernel function                                (kfunc)
 xdp_synproxy                             # JIT does not support calling kernel function                                (kfunc)
 xfrm_info                                # JIT does not support calling kernel function                                (kfunc)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 22533a18705e..e09bef2b7502 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -527,7 +527,7 @@ TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
 			 btf_helpers.c flow_dissector_load.h		\
-			 cap_helpers.c test_loader.c
+			 cap_helpers.c test_loader.c xsk.c
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(OUTPUT)/liburandom_read.so			\
 		       $(OUTPUT)/xdp_synproxy				\
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
new file mode 100644
index 000000000000..e033d48288c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "xdp_metadata.skel.h"
+#include "xdp_metadata2.skel.h"
+#include "xdp_metadata.h"
+#include "xsk.h"
+
+#include <bpf/btf.h>
+#include <linux/errqueue.h>
+#include <linux/if_link.h>
+#include <linux/net_tstamp.h>
+#include <linux/udp.h>
+#include <sys/mman.h>
+#include <net/if.h>
+#include <poll.h>
+
+#define TX_NAME "veTX"
+#define RX_NAME "veRX"
+
+#define UDP_PAYLOAD_BYTES 4
+
+#define AF_XDP_SOURCE_PORT 1234
+#define AF_XDP_CONSUMER_PORT 8080
+
+#define UMEM_NUM 16
+#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
+#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
+#define XDP_FLAGS XDP_FLAGS_DRV_MODE
+#define QUEUE_ID 0
+
+#define TX_ADDR "10.0.0.1"
+#define RX_ADDR "10.0.0.2"
+#define PREFIX_LEN "8"
+#define FAMILY AF_INET
+
+#define SYS(cmd) ({ \
+	if (!ASSERT_OK(system(cmd), (cmd))) \
+		goto out; \
+})
+
+struct xsk {
+	void *umem_area;
+	struct xsk_umem *umem;
+	struct xsk_ring_prod fill;
+	struct xsk_ring_cons comp;
+	struct xsk_ring_prod tx;
+	struct xsk_ring_cons rx;
+	struct xsk_socket *socket;
+};
+
+static int open_xsk(int ifindex, struct xsk *xsk)
+{
+	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+	const struct xsk_socket_config socket_config = {
+		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.bind_flags = XDP_COPY,
+	};
+	const struct xsk_umem_config umem_config = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
+	};
+	__u32 idx;
+	u64 addr;
+	int ret;
+	int i;
+
+	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+	if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
+		return -1;
+
+	ret = xsk_umem__create(&xsk->umem,
+			       xsk->umem_area, UMEM_SIZE,
+			       &xsk->fill,
+			       &xsk->comp,
+			       &umem_config);
+	if (!ASSERT_OK(ret, "xsk_umem__create"))
+		return ret;
+
+	ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
+				 xsk->umem,
+				 &xsk->rx,
+				 &xsk->tx,
+				 &socket_config);
+	if (!ASSERT_OK(ret, "xsk_socket__create"))
+		return ret;
+
+	/* First half of umem is for TX. This way address matches 1-to-1
+	 * to the completion queue index.
+	 */
+
+	for (i = 0; i < UMEM_NUM / 2; i++) {
+		addr = i * UMEM_FRAME_SIZE;
+		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
+	}
+
+	/* Second half of umem is for RX. */
+
+	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
+	if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
+		return ret;
+	if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
+		return -1;
+
+	for (i = 0; i < UMEM_NUM / 2; i++) {
+		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
+		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
+		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
+	}
+	xsk_ring_prod__submit(&xsk->fill, ret);
+
+	return 0;
+}
+
+static void close_xsk(struct xsk *xsk)
+{
+	if (xsk->umem)
+		xsk_umem__delete(xsk->umem);
+	if (xsk->socket)
+		xsk_socket__delete(xsk->socket);
+	munmap(xsk->umem, UMEM_SIZE);
+}
+
+static void ip_csum(struct iphdr *iph)
+{
+	__u32 sum = 0;
+	__u16 *p;
+	int i;
+
+	iph->check = 0;
+	p = (void *)iph;
+	for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
+		sum += p[i];
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	iph->check = ~sum;
+}
+
+static int generate_packet(struct xsk *xsk, __u16 dst_port)
+{
+	struct xdp_desc *tx_desc;
+	struct udphdr *udph;
+	struct ethhdr *eth;
+	struct iphdr *iph;
+	void *data;
+	__u32 idx;
+	int ret;
+
+	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+	if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
+		return -1;
+
+	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
+	printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
+	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+	eth = data;
+	iph = (void *)(eth + 1);
+	udph = (void *)(iph + 1);
+
+	memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
+	memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
+	eth->h_proto = htons(ETH_P_IP);
+
+	iph->version = 0x4;
+	iph->ihl = 0x5;
+	iph->tos = 0x9;
+	iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
+	iph->id = 0;
+	iph->frag_off = 0;
+	iph->ttl = 0;
+	iph->protocol = IPPROTO_UDP;
+	ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
+	ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
+	ip_csum(iph);
+
+	udph->source = htons(AF_XDP_SOURCE_PORT);
+	udph->dest = htons(dst_port);
+	udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
+	udph->check = 0;
+
+	memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
+
+	tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
+	xsk_ring_prod__submit(&xsk->tx, 1);
+
+	ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
+	if (!ASSERT_GE(ret, 0, "sendto"))
+		return ret;
+
+	return 0;
+}
+
+static void complete_tx(struct xsk *xsk)
+{
+	__u32 idx;
+	__u64 addr;
+
+	if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
+		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+
+		printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
+		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
+		xsk_ring_prod__submit(&xsk->fill, 1);
+	}
+}
+
+static void refill_rx(struct xsk *xsk, __u64 addr)
+{
+	__u32 idx;
+
+	if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
+		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
+		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
+		xsk_ring_prod__submit(&xsk->fill, 1);
+	}
+}
+
+static int verify_xsk_metadata(struct xsk *xsk)
+{
+	const struct xdp_desc *rx_desc;
+	struct pollfd fds = {};
+	struct xdp_meta *meta;
+	struct ethhdr *eth;
+	struct iphdr *iph;
+	__u64 comp_addr;
+	void *data;
+	__u64 addr;
+	__u32 idx;
+	int ret;
+
+	ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+	if (!ASSERT_EQ(ret, 0, "recvfrom"))
+		return -1;
+
+	fds.fd = xsk_socket__fd(xsk->socket);
+	fds.events = POLLIN;
+
+	ret = poll(&fds, 1, 1000);
+	if (!ASSERT_GT(ret, 0, "poll"))
+		return -1;
+
+	ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
+	if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
+		return -2;
+
+	rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
+	comp_addr = xsk_umem__extract_addr(rx_desc->addr);
+	addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
+	printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
+	       xsk, idx, rx_desc->addr, addr, comp_addr);
+	data = xsk_umem__get_data(xsk->umem_area, addr);
+
+	/* Make sure we got the packet offset correctly. */
+
+	eth = data;
+	ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
+	iph = (void *)(eth + 1);
+	ASSERT_EQ((int)iph->version, 4, "iph->version");
+
+	/* custom metadata */
+
+	meta = data - sizeof(struct xdp_meta);
+
+	if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
+		return -1;
+
+	if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
+		return -1;
+
+	xsk_ring_cons__release(&xsk->rx, 1);
+	refill_rx(xsk, comp_addr);
+
+	return 0;
+}
+
+void test_xdp_metadata(void)
+{
+	struct xdp_metadata2 *bpf_obj2 = NULL;
+	struct xdp_metadata *bpf_obj = NULL;
+	struct bpf_program *new_prog, *prog;
+	struct nstoken *tok = NULL;
+	__u32 queue_id = QUEUE_ID;
+	struct bpf_map *prog_arr;
+	struct xsk tx_xsk = {};
+	struct xsk rx_xsk = {};
+	__u32 val, key = 0;
+	int retries = 10;
+	int rx_ifindex;
+	int tx_ifindex;
+	int sock_fd;
+	int ret;
+
+	/* Setup new networking namespace, with a veth pair. */
+
+	SYS("ip netns add xdp_metadata");
+	tok = open_netns("xdp_metadata");
+	SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
+	    " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
+	SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01");
+	SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02");
+	SYS("ip link set dev " TX_NAME " up");
+	SYS("ip link set dev " RX_NAME " up");
+	SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+	SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+
+	rx_ifindex = if_nametoindex(RX_NAME);
+	tx_ifindex = if_nametoindex(TX_NAME);
+
+	/* Setup separate AF_XDP for TX and RX interfaces. */
+
+	ret = open_xsk(tx_ifindex, &tx_xsk);
+	if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
+		goto out;
+
+	ret = open_xsk(rx_ifindex, &rx_xsk);
+	if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
+		goto out;
+
+	bpf_obj = xdp_metadata__open();
+	if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
+		goto out;
+
+	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
+	bpf_program__set_ifindex(prog, rx_ifindex);
+	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+	if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
+		goto out;
+
+	/* Make sure we can't add dev-bound programs to prog maps. */
+	prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
+	if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
+		goto out;
+
+	val = bpf_program__fd(prog);
+	if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
+					     &val, sizeof(val), BPF_ANY),
+			"update prog_arr"))
+		goto out;
+
+	/* Attach BPF program to RX interface. */
+
+	ret = bpf_xdp_attach(rx_ifindex,
+			     bpf_program__fd(bpf_obj->progs.rx),
+			     XDP_FLAGS, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto out;
+
+	sock_fd = xsk_socket__fd(rx_xsk.socket);
+	ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
+	if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
+		goto out;
+
+	/* Send packet destined to RX AF_XDP socket. */
+	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
+		       "generate AF_XDP_CONSUMER_PORT"))
+		goto out;
+
+	/* Verify AF_XDP RX packet has proper metadata. */
+	if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
+		       "verify_xsk_metadata"))
+		goto out;
+
+	complete_tx(&tx_xsk);
+
+	/* Make sure freplace correctly picks up original bound device
+	 * and doesn't crash.
+	 */
+
+	bpf_obj2 = xdp_metadata2__open();
+	if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
+		goto out;
+
+	new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
+	bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
+
+	if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
+		goto out;
+
+	if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
+		goto out;
+
+	/* Send packet to trigger . */
+	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
+		       "generate freplace packet"))
+		goto out;
+
+	while (!retries--) {
+		if (bpf_obj2->bss->called)
+			break;
+		usleep(10);
+	}
+	ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
+
+out:
+	close_xsk(&rx_xsk);
+	close_xsk(&tx_xsk);
+	xdp_metadata2__destroy(bpf_obj2);
+	xdp_metadata__destroy(bpf_obj);
+	if (tok)
+		close_netns(tok);
+	system("ip netns del xdp_metadata");
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
new file mode 100644
index 000000000000..77678b034389
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_XSKMAP);
+	__uint(max_entries, 4);
+	__type(key, __u32);
+	__type(value, __u32);
+} xsk SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} prog_arr SEC(".maps");
+
+extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
+					 __u64 *timestamp) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
+				    __u32 *hash) __ksym;
+
+SEC("xdp")
+int rx(struct xdp_md *ctx)
+{
+	void *data, *data_meta;
+	struct xdp_meta *meta;
+	u64 timestamp = -1;
+	int ret;
+
+	/* Reserve enough for all custom metadata. */
+
+	ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+	if (ret != 0)
+		return XDP_DROP;
+
+	data = (void *)(long)ctx->data;
+	data_meta = (void *)(long)ctx->data_meta;
+
+	if (data_meta + sizeof(struct xdp_meta) > data)
+		return XDP_DROP;
+
+	meta = data_meta;
+
+	/* Export metadata. */
+
+	/* We expect veth bpf_xdp_metadata_rx_timestamp to return 0 HW
+	 * timestamp, so put some non-zero value into AF_XDP frame for
+	 * the userspace.
+	 */
+	bpf_xdp_metadata_rx_timestamp(ctx, &timestamp);
+	if (timestamp == 0)
+		meta->rx_timestamp = 1;
+
+	bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
+
+	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata2.c b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
new file mode 100644
index 000000000000..cf69d05451c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
+				    __u32 *hash) __ksym;
+
+int called;
+
+SEC("freplace/rx")
+int freplace_rx(struct xdp_md *ctx)
+{
+	u32 hash = 0;
+	/* Call _any_ metadata function to make sure we don't crash. */
+	bpf_xdp_metadata_rx_hash(ctx, &hash);
+	called++;
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
new file mode 100644
index 000000000000..f6780fbb0a21
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
+
+#ifndef ETH_P_IPV6
+#define ETH_P_IPV6 0x86DD
+#endif
+
+struct xdp_meta {
+	__u64 rx_timestamp;
+	__u32 rx_hash;
+};
-- 
cgit v1.2.3


From 297a3f1241550f6969f65a5efeee9162241daae5 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 19 Jan 2023 14:15:36 -0800
Subject: selftests/bpf: Simple program to dump XDP RX metadata

To be used for verification of driver implementations. Note that
the skb path is gone from the series, but I'm still keeping the
implementation for any possible future work.

$ xdp_hw_metadata <ifname>

On the other machine:

$ echo -n xdp | nc -u -q1 <target> 9091 # for AF_XDP
$ echo -n skb | nc -u -q1 <target> 9092 # for skb

Sample output:

  # xdp
  xsk_ring_cons__peek: 1
  0x19f9090: rx_desc[0]->addr=100000000008000 addr=8100 comp_addr=8000
  rx_timestamp_supported: 1
  rx_timestamp: 1667850075063948829
  0x19f9090: complete idx=8 addr=8000

  # skb
  found skb hwtstamp = 1668314052.854274681

Decoding:
  # xdp
  rx_timestamp=1667850075.063948829

  $ date -d @1667850075
  Mon Nov  7 11:41:15 AM PST 2022
  $ date
  Mon Nov  7 11:42:05 AM PST 2022

  # skb
  $ date -d @1668314052
  Sat Nov 12 08:34:12 PM PST 2022
  $ date
  Sat Nov 12 08:37:06 PM PST 2022

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-18-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore             |   1 +
 tools/testing/selftests/bpf/Makefile               |   7 +-
 .../testing/selftests/bpf/progs/xdp_hw_metadata.c  |  81 +++++
 tools/testing/selftests/bpf/xdp_hw_metadata.c      | 403 +++++++++++++++++++++
 4 files changed, 491 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
 create mode 100644 tools/testing/selftests/bpf/xdp_hw_metadata.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 401a75844cc0..4aa5bba956ff 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -47,3 +47,4 @@ test_cpp
 xskxceiver
 xdp_redirect_multi
 xdp_synproxy
+xdp_hw_metadata
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e09bef2b7502..a554b41fe872 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -83,7 +83,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
 	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-	xskxceiver xdp_redirect_multi xdp_synproxy veristat
+	xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file
 TEST_GEN_FILES += liburandom_read.so
@@ -383,6 +383,7 @@ test_subskeleton.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib
 test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o
 test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
 xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
+xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
 
 LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
@@ -580,6 +581,10 @@ $(OUTPUT)/xskxceiver: xskxceiver.c $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
+$(OUTPUT)/xdp_hw_metadata: xdp_hw_metadata.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xdp_hw_metadata.skel.h | $(OUTPUT)
+	$(call msg,BINARY,,$@)
+	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
 # Make sure we are able to include and link libbpf against c++.
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
new file mode 100644
index 000000000000..25b8178735ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_XSKMAP);
+	__uint(max_entries, 256);
+	__type(key, __u32);
+	__type(value, __u32);
+} xsk SEC(".maps");
+
+extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
+					 __u64 *timestamp) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
+				    __u32 *hash) __ksym;
+
+SEC("xdp")
+int rx(struct xdp_md *ctx)
+{
+	void *data, *data_meta, *data_end;
+	struct ipv6hdr *ip6h = NULL;
+	struct ethhdr *eth = NULL;
+	struct udphdr *udp = NULL;
+	struct iphdr *iph = NULL;
+	struct xdp_meta *meta;
+	int ret;
+
+	data = (void *)(long)ctx->data;
+	data_end = (void *)(long)ctx->data_end;
+	eth = data;
+	if (eth + 1 < data_end) {
+		if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+			iph = (void *)(eth + 1);
+			if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP)
+				udp = (void *)(iph + 1);
+		}
+		if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+			ip6h = (void *)(eth + 1);
+			if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP)
+				udp = (void *)(ip6h + 1);
+		}
+		if (udp && udp + 1 > data_end)
+			udp = NULL;
+	}
+
+	if (!udp)
+		return XDP_PASS;
+
+	if (udp->dest != bpf_htons(9091))
+		return XDP_PASS;
+
+	bpf_printk("forwarding UDP:9091 to AF_XDP");
+
+	ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+	if (ret != 0) {
+		bpf_printk("bpf_xdp_adjust_meta returned %d", ret);
+		return XDP_PASS;
+	}
+
+	data = (void *)(long)ctx->data;
+	data_meta = (void *)(long)ctx->data_meta;
+	meta = data_meta;
+
+	if (meta + 1 > data) {
+		bpf_printk("bpf_xdp_adjust_meta doesn't appear to work");
+		return XDP_PASS;
+	}
+
+	if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp))
+		bpf_printk("populated rx_timestamp with %u", meta->rx_timestamp);
+
+	if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash))
+		bpf_printk("populated rx_hash with %u", meta->rx_hash);
+
+	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
new file mode 100644
index 000000000000..0008f0f239e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Reference program for verifying XDP metadata on real HW. Functional test
+ * only, doesn't test the performance.
+ *
+ * RX:
+ * - UDP 9091 packets are diverted into AF_XDP
+ * - Metadata verified:
+ *   - rx_timestamp
+ *   - rx_hash
+ *
+ * TX:
+ * - TBD
+ */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "xdp_hw_metadata.skel.h"
+#include "xsk.h"
+
+#include <error.h>
+#include <linux/errqueue.h>
+#include <linux/if_link.h>
+#include <linux/net_tstamp.h>
+#include <linux/udp.h>
+#include <linux/sockios.h>
+#include <sys/mman.h>
+#include <net/if.h>
+#include <poll.h>
+
+#include "xdp_metadata.h"
+
+#define UMEM_NUM 16
+#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
+#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
+#define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
+
+struct xsk {
+	void *umem_area;
+	struct xsk_umem *umem;
+	struct xsk_ring_prod fill;
+	struct xsk_ring_cons comp;
+	struct xsk_ring_prod tx;
+	struct xsk_ring_cons rx;
+	struct xsk_socket *socket;
+};
+
+struct xdp_hw_metadata *bpf_obj;
+struct xsk *rx_xsk;
+const char *ifname;
+int ifindex;
+int rxq;
+
+void test__fail(void) { /* for network_helpers.c */ }
+
+static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
+{
+	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+	const struct xsk_socket_config socket_config = {
+		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.bind_flags = XDP_COPY,
+	};
+	const struct xsk_umem_config umem_config = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
+	};
+	__u32 idx;
+	u64 addr;
+	int ret;
+	int i;
+
+	xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+	if (xsk->umem_area == MAP_FAILED)
+		return -ENOMEM;
+
+	ret = xsk_umem__create(&xsk->umem,
+			       xsk->umem_area, UMEM_SIZE,
+			       &xsk->fill,
+			       &xsk->comp,
+			       &umem_config);
+	if (ret)
+		return ret;
+
+	ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
+				 xsk->umem,
+				 &xsk->rx,
+				 &xsk->tx,
+				 &socket_config);
+	if (ret)
+		return ret;
+
+	/* First half of umem is for TX. This way address matches 1-to-1
+	 * to the completion queue index.
+	 */
+
+	for (i = 0; i < UMEM_NUM / 2; i++) {
+		addr = i * UMEM_FRAME_SIZE;
+		printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
+	}
+
+	/* Second half of umem is for RX. */
+
+	ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
+	for (i = 0; i < UMEM_NUM / 2; i++) {
+		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
+		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
+		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
+	}
+	xsk_ring_prod__submit(&xsk->fill, ret);
+
+	return 0;
+}
+
+static void close_xsk(struct xsk *xsk)
+{
+	if (xsk->umem)
+		xsk_umem__delete(xsk->umem);
+	if (xsk->socket)
+		xsk_socket__delete(xsk->socket);
+	munmap(xsk->umem, UMEM_SIZE);
+}
+
+static void refill_rx(struct xsk *xsk, __u64 addr)
+{
+	__u32 idx;
+
+	if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
+		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
+		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
+		xsk_ring_prod__submit(&xsk->fill, 1);
+	}
+}
+
+static void verify_xdp_metadata(void *data)
+{
+	struct xdp_meta *meta;
+
+	meta = data - sizeof(*meta);
+
+	printf("rx_timestamp: %llu\n", meta->rx_timestamp);
+	printf("rx_hash: %u\n", meta->rx_hash);
+}
+
+static void verify_skb_metadata(int fd)
+{
+	char cmsg_buf[1024];
+	char packet_buf[128];
+
+	struct scm_timestamping *ts;
+	struct iovec packet_iov;
+	struct cmsghdr *cmsg;
+	struct msghdr hdr;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_iov = &packet_iov;
+	hdr.msg_iovlen = 1;
+	packet_iov.iov_base = packet_buf;
+	packet_iov.iov_len = sizeof(packet_buf);
+
+	hdr.msg_control = cmsg_buf;
+	hdr.msg_controllen = sizeof(cmsg_buf);
+
+	if (recvmsg(fd, &hdr, 0) < 0)
+		error(-1, errno, "recvmsg");
+
+	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case SCM_TIMESTAMPING:
+			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+			if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
+				printf("found skb hwtstamp = %lu.%lu\n",
+				       ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
+				return;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	printf("skb hwtstamp is not found!\n");
+}
+
+static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
+{
+	const struct xdp_desc *rx_desc;
+	struct pollfd fds[rxq + 1];
+	__u64 comp_addr;
+	__u64 addr;
+	__u32 idx;
+	int ret;
+	int i;
+
+	for (i = 0; i < rxq; i++) {
+		fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
+		fds[i].events = POLLIN;
+		fds[i].revents = 0;
+	}
+
+	fds[rxq].fd = server_fd;
+	fds[rxq].events = POLLIN;
+	fds[rxq].revents = 0;
+
+	while (true) {
+		errno = 0;
+		ret = poll(fds, rxq + 1, 1000);
+		printf("poll: %d (%d)\n", ret, errno);
+		if (ret < 0)
+			break;
+		if (ret == 0)
+			continue;
+
+		if (fds[rxq].revents)
+			verify_skb_metadata(server_fd);
+
+		for (i = 0; i < rxq; i++) {
+			if (fds[i].revents == 0)
+				continue;
+
+			struct xsk *xsk = &rx_xsk[i];
+
+			ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
+			printf("xsk_ring_cons__peek: %d\n", ret);
+			if (ret != 1)
+				continue;
+
+			rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
+			comp_addr = xsk_umem__extract_addr(rx_desc->addr);
+			addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
+			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
+			       xsk, idx, rx_desc->addr, addr, comp_addr);
+			verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr));
+			xsk_ring_cons__release(&xsk->rx, 1);
+			refill_rx(xsk, comp_addr);
+		}
+	}
+
+	return 0;
+}
+
+struct ethtool_channels {
+	__u32	cmd;
+	__u32	max_rx;
+	__u32	max_tx;
+	__u32	max_other;
+	__u32	max_combined;
+	__u32	rx_count;
+	__u32	tx_count;
+	__u32	other_count;
+	__u32	combined_count;
+};
+
+#define ETHTOOL_GCHANNELS	0x0000003c /* Get no of channels */
+
+static int rxq_num(const char *ifname)
+{
+	struct ethtool_channels ch = {
+		.cmd = ETHTOOL_GCHANNELS,
+	};
+
+	struct ifreq ifr = {
+		.ifr_data = (void *)&ch,
+	};
+	strcpy(ifr.ifr_name, ifname);
+	int fd, ret;
+
+	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (fd < 0)
+		error(-1, errno, "socket");
+
+	ret = ioctl(fd, SIOCETHTOOL, &ifr);
+	if (ret < 0)
+		error(-1, errno, "socket");
+
+	close(fd);
+
+	return ch.rx_count + ch.combined_count;
+}
+
+static void cleanup(void)
+{
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+	int ret;
+	int i;
+
+	if (bpf_obj) {
+		opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
+		if (opts.old_prog_fd >= 0) {
+			printf("detaching bpf program....\n");
+			ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
+			if (ret)
+				printf("failed to detach XDP program: %d\n", ret);
+		}
+	}
+
+	for (i = 0; i < rxq; i++)
+		close_xsk(&rx_xsk[i]);
+
+	if (bpf_obj)
+		xdp_hw_metadata__destroy(bpf_obj);
+}
+
+static void handle_signal(int sig)
+{
+	/* interrupting poll() is all we need */
+}
+
+static void timestamping_enable(int fd, int val)
+{
+	int ret;
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
+	if (ret < 0)
+		error(-1, errno, "setsockopt(SO_TIMESTAMPING)");
+}
+
+int main(int argc, char *argv[])
+{
+	int server_fd = -1;
+	int ret;
+	int i;
+
+	struct bpf_program *prog;
+
+	if (argc != 2) {
+		fprintf(stderr, "pass device name\n");
+		return -1;
+	}
+
+	ifname = argv[1];
+	ifindex = if_nametoindex(ifname);
+	rxq = rxq_num(ifname);
+
+	printf("rxq: %d\n", rxq);
+
+	rx_xsk = malloc(sizeof(struct xsk) * rxq);
+	if (!rx_xsk)
+		error(-1, ENOMEM, "malloc");
+
+	for (i = 0; i < rxq; i++) {
+		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
+		ret = open_xsk(ifindex, &rx_xsk[i], i);
+		if (ret)
+			error(-1, -ret, "open_xsk");
+
+		printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
+	}
+
+	printf("open bpf program...\n");
+	bpf_obj = xdp_hw_metadata__open();
+	if (libbpf_get_error(bpf_obj))
+		error(-1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
+
+	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
+	bpf_program__set_ifindex(prog, ifindex);
+	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+	printf("load bpf program...\n");
+	ret = xdp_hw_metadata__load(bpf_obj);
+	if (ret)
+		error(-1, -ret, "xdp_hw_metadata__load");
+
+	printf("prepare skb endpoint...\n");
+	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
+	if (server_fd < 0)
+		error(-1, errno, "start_server");
+	timestamping_enable(server_fd,
+			    SOF_TIMESTAMPING_SOFTWARE |
+			    SOF_TIMESTAMPING_RAW_HARDWARE);
+
+	printf("prepare xsk map...\n");
+	for (i = 0; i < rxq; i++) {
+		int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
+		__u32 queue_id = i;
+
+		printf("map[%d] = %d\n", queue_id, sock_fd);
+		ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
+		if (ret)
+			error(-1, -ret, "bpf_map_update_elem");
+	}
+
+	printf("attach bpf program...\n");
+	ret = bpf_xdp_attach(ifindex,
+			     bpf_program__fd(bpf_obj->progs.rx),
+			     XDP_FLAGS, NULL);
+	if (ret)
+		error(-1, -ret, "bpf_xdp_attach");
+
+	signal(SIGINT, handle_signal);
+	ret = verify_metadata(rx_xsk, rxq, server_fd);
+	close(server_fd);
+	cleanup();
+	if (ret)
+		error(-1, -ret, "verify_metadata");
+}
-- 
cgit v1.2.3


From 7525daeefc8c20902bac63f89603096c76808fe0 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 19 Jan 2023 20:18:44 -0600
Subject: selftests/bpf: Use __failure macro in task kfunc testsuite

In commit 537c3f66eac1 ("selftests/bpf: add generic BPF program tester-loader"),
a new mechanism was added to the BPF selftest framework to allow testsuites to
use macros to define expected failing testcases.

This allows any testsuite which tests verification failure to remove a good
amount of boilerplate code. This patch updates the task_kfunc selftest suite
to use these new macros.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20230120021844.3048244-1-void@manifault.com
---
 .../testing/selftests/bpf/prog_tests/task_kfunc.c  | 71 +---------------------
 .../selftests/bpf/progs/task_kfunc_failure.c       | 18 ++++++
 2 files changed, 19 insertions(+), 70 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index 18848c31e36f..f79fa5bc9a8d 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -9,9 +9,6 @@
 #include "task_kfunc_failure.skel.h"
 #include "task_kfunc_success.skel.h"
 
-static size_t log_buf_sz = 1 << 20; /* 1 MB */
-static char obj_log_buf[1048576];
-
 static struct task_kfunc_success *open_load_task_kfunc_skel(void)
 {
 	struct task_kfunc_success *skel;
@@ -83,67 +80,6 @@ static const char * const success_tests[] = {
 	"test_task_from_pid_invalid",
 };
 
-static struct {
-	const char *prog_name;
-	const char *expected_err_msg;
-} failure_tests[] = {
-	{"task_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
-	{"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"},
-	{"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
-	{"task_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
-	{"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"},
-	{"task_kfunc_acquire_unreleased", "Unreleased reference"},
-	{"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
-	{"task_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
-	{"task_kfunc_get_null", "arg#0 expected pointer to map value"},
-	{"task_kfunc_xchg_unreleased", "Unreleased reference"},
-	{"task_kfunc_get_unreleased", "Unreleased reference"},
-	{"task_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"},
-	{"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"},
-	{"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
-	{"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
-	{"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
-	{"task_kfunc_from_lsm_task_free", "reg type unsupported for arg#0 function"},
-};
-
-static void verify_fail(const char *prog_name, const char *expected_err_msg)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	struct task_kfunc_failure *skel;
-	int err, i;
-
-	opts.kernel_log_buf = obj_log_buf;
-	opts.kernel_log_size = log_buf_sz;
-	opts.kernel_log_level = 1;
-
-	skel = task_kfunc_failure__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "task_kfunc_failure__open_opts"))
-		goto cleanup;
-
-	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
-		struct bpf_program *prog;
-		const char *curr_name = failure_tests[i].prog_name;
-
-		prog = bpf_object__find_program_by_name(skel->obj, curr_name);
-		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
-			goto cleanup;
-
-		bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
-	}
-
-	err = task_kfunc_failure__load(skel);
-	if (!ASSERT_ERR(err, "unexpected load success"))
-		goto cleanup;
-
-	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
-		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
-		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
-	}
-
-cleanup:
-	task_kfunc_failure__destroy(skel);
-}
-
 void test_task_kfunc(void)
 {
 	int i;
@@ -155,10 +91,5 @@ void test_task_kfunc(void)
 		run_success_test(success_tests[i]);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
-		if (!test__start_subtest(failure_tests[i].prog_name))
-			continue;
-
-		verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
-	}
+	RUN_TESTS(task_kfunc_failure);
 }
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index 1b47b94dbca0..e6950d6a9cf0 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -5,6 +5,7 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 
+#include "bpf_misc.h"
 #include "task_kfunc_common.h"
 
 char _license[] SEC("license") = "GPL";
@@ -27,6 +28,7 @@ static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *ta
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("R1 must be referenced or trusted")
 int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
@@ -44,6 +46,7 @@ int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_f
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point")
 int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
@@ -56,6 +59,7 @@ int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
 }
 
 SEC("kretprobe/free_task")
+__failure __msg("reg type unsupported for arg#0 function")
 int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
@@ -68,6 +72,7 @@ int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("R1 must be referenced or trusted")
 int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
@@ -81,6 +86,7 @@ int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 cl
 
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point")
 int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
@@ -95,6 +101,7 @@ int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
 int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
@@ -107,6 +114,7 @@ int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 expected pointer to map value")
 int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *kptr;
@@ -122,6 +130,7 @@ int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 expected pointer to map value")
 int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *kptr, *acquired;
@@ -140,6 +149,7 @@ int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clo
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 expected pointer to map value")
 int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *kptr;
@@ -155,6 +165,7 @@ int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags)
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
 int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *kptr;
@@ -174,6 +185,7 @@ int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_fla
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
 int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *kptr;
@@ -193,6 +205,7 @@ int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flag
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket")
 int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
 {
 	struct __tasks_kfunc_map_value *v;
@@ -208,6 +221,7 @@ int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_f
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point")
 int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired = (struct task_struct *)&clone_flags;
@@ -219,6 +233,7 @@ int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
 int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
 {
 	struct __tasks_kfunc_map_value local, *v;
@@ -251,6 +266,7 @@ int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("release kernel function bpf_task_release expects")
 int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
 {
 	/* Cannot release trusted task pointer which was not acquired. */
@@ -260,6 +276,7 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_
 }
 
 SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
 int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
@@ -273,6 +290,7 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl
 }
 
 SEC("lsm/task_free")
+__failure __msg("reg type unsupported for arg#0 function")
 int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
 {
 	struct task_struct *acquired;
-- 
cgit v1.2.3


From bc72742bebec8e0766af1e07e13169a873f9119c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 20 Jan 2023 12:09:00 -0800
Subject: selftests/bpf: Validate arch-specific argument registers limits

Update uprobe_autoattach selftest to validate architecture-specific
argument passing through registers. Use new BPF_UPROBE and
BPF_URETPROBE, and construct both BPF-side and user-space side in such
a way that for different architectures we are fetching and checking
different number of arguments, matching architecture-specific limit of
how many registers are available for argument passing.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Alan Maguire <alan.maguire@oracle.com> # arm64
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com> # s390x
Link: https://lore.kernel.org/bpf/20230120200914.3008030-12-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/uprobe_autoattach.c   | 33 ++++++++++++---
 tools/testing/selftests/bpf/progs/bpf_misc.h       | 25 +++++++++++
 .../selftests/bpf/progs/test_uprobe_autoattach.c   | 48 +++++++++++++++++++++-
 3 files changed, 99 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
index 35b87c7ba5be..82807def0d24 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -3,18 +3,21 @@
 
 #include <test_progs.h>
 #include "test_uprobe_autoattach.skel.h"
+#include "progs/bpf_misc.h"
 
 /* uprobe attach point */
-static noinline int autoattach_trigger_func(int arg)
+static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3,
+					    int arg4, int arg5, int arg6,
+					    int arg7, int arg8)
 {
 	asm volatile ("");
-	return arg + 1;
+	return arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8 + 1;
 }
 
 void test_uprobe_autoattach(void)
 {
 	struct test_uprobe_autoattach *skel;
-	int trigger_val = 100, trigger_ret;
+	int trigger_ret;
 	size_t malloc_sz = 1;
 	char *mem;
 
@@ -28,22 +31,42 @@ void test_uprobe_autoattach(void)
 	skel->bss->test_pid = getpid();
 
 	/* trigger & validate uprobe & uretprobe */
-	trigger_ret = autoattach_trigger_func(trigger_val);
+	trigger_ret = autoattach_trigger_func(1, 2, 3, 4, 5, 6, 7, 8);
 
 	skel->bss->test_pid = getpid();
 
 	/* trigger & validate shared library u[ret]probes attached by name */
 	mem = malloc(malloc_sz);
 
-	ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1");
+	ASSERT_EQ(skel->bss->uprobe_byname_parm1, 1, "check_uprobe_byname_parm1");
 	ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
 	ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
+	ASSERT_EQ(skel->bss->uretprobe_byname_ret, trigger_ret, "check_uretprobe_byname_ret");
 	ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
 	ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1");
 	ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
 	ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
 	ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
 
+	ASSERT_EQ(skel->bss->a[0], 1, "arg1");
+	ASSERT_EQ(skel->bss->a[1], 2, "arg2");
+	ASSERT_EQ(skel->bss->a[2], 3, "arg3");
+#if FUNC_REG_ARG_CNT > 3
+	ASSERT_EQ(skel->bss->a[3], 4, "arg4");
+#endif
+#if FUNC_REG_ARG_CNT > 4
+	ASSERT_EQ(skel->bss->a[4], 5, "arg5");
+#endif
+#if FUNC_REG_ARG_CNT > 5
+	ASSERT_EQ(skel->bss->a[5], 6, "arg6");
+#endif
+#if FUNC_REG_ARG_CNT > 6
+	ASSERT_EQ(skel->bss->a[6], 7, "arg7");
+#endif
+#if FUNC_REG_ARG_CNT > 7
+	ASSERT_EQ(skel->bss->a[7], 8, "arg8");
+#endif
+
 	free(mem);
 cleanup:
 	test_uprobe_autoattach__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 2d7b89b447b2..14e28f991451 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -28,4 +28,29 @@
 #define SYS_PREFIX "__se_"
 #endif
 
+/* How many arguments are passed to function in register */
+#if defined(__TARGET_ARCH_x86) || defined(__x86_64__)
+#define FUNC_REG_ARG_CNT 6
+#elif defined(__i386__)
+#define FUNC_REG_ARG_CNT 3
+#elif defined(__TARGET_ARCH_s390) || defined(__s390x__)
+#define FUNC_REG_ARG_CNT 5
+#elif defined(__TARGET_ARCH_arm) || defined(__arm__)
+#define FUNC_REG_ARG_CNT 4
+#elif defined(__TARGET_ARCH_arm64) || defined(__aarch64__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_mips) || defined(__mips__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_powerpc) || defined(__powerpc__) || defined(__powerpc64__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_sparc) || defined(__sparc__)
+#define FUNC_REG_ARG_CNT 6
+#elif defined(__TARGET_ARCH_riscv) || defined(__riscv__)
+#define FUNC_REG_ARG_CNT 8
+#else
+/* default to 5 for others */
+#define FUNC_REG_ARG_CNT 5
+#endif
+
+
 #endif
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
index ab75522e2eeb..774ddeb45898 100644
--- a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -6,10 +6,12 @@
 #include <bpf/bpf_core_read.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 int uprobe_byname_parm1 = 0;
 int uprobe_byname_ran = 0;
 int uretprobe_byname_rc = 0;
+int uretprobe_byname_ret = 0;
 int uretprobe_byname_ran = 0;
 size_t uprobe_byname2_parm1 = 0;
 int uprobe_byname2_ran = 0;
@@ -18,6 +20,8 @@ int uretprobe_byname2_ran = 0;
 
 int test_pid;
 
+int a[8];
+
 /* This program cannot auto-attach, but that should not stop other
  * programs from attaching.
  */
@@ -28,18 +32,58 @@ int handle_uprobe_noautoattach(struct pt_regs *ctx)
 }
 
 SEC("uprobe//proc/self/exe:autoattach_trigger_func")
-int handle_uprobe_byname(struct pt_regs *ctx)
+int BPF_UPROBE(handle_uprobe_byname
+	       , int arg1
+	       , int arg2
+	       , int arg3
+#if FUNC_REG_ARG_CNT > 3
+	       , int arg4
+#endif
+#if FUNC_REG_ARG_CNT > 4
+	       , int arg5
+#endif
+#if FUNC_REG_ARG_CNT > 5
+	       , int arg6
+#endif
+#if FUNC_REG_ARG_CNT > 6
+	       , int arg7
+#endif
+#if FUNC_REG_ARG_CNT > 7
+	       , int arg8
+#endif
+)
 {
 	uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx);
 	uprobe_byname_ran = 1;
+
+	a[0] = arg1;
+	a[1] = arg2;
+	a[2] = arg3;
+#if FUNC_REG_ARG_CNT > 3
+	a[3] = arg4;
+#endif
+#if FUNC_REG_ARG_CNT > 4
+	a[4] = arg5;
+#endif
+#if FUNC_REG_ARG_CNT > 5
+	a[5] = arg6;
+#endif
+#if FUNC_REG_ARG_CNT > 6
+	a[6] = arg7;
+#endif
+#if FUNC_REG_ARG_CNT > 7
+	a[7] = arg8;
+#endif
 	return 0;
 }
 
 SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
-int handle_uretprobe_byname(struct pt_regs *ctx)
+int BPF_URETPROBE(handle_uretprobe_byname, int ret)
 {
 	uretprobe_byname_rc = PT_REGS_RC_CORE(ctx);
+	uretprobe_byname_ret = ret;
 	uretprobe_byname_ran = 2;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 92dc5cdfc1131befa061cab8ed772c7bf8bfb2db Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 20 Jan 2023 12:09:13 -0800
Subject: selftests/bpf: Add 6-argument syscall tracing test

Turns out splice() is one of the syscalls that's using current maximum
number of arguments (six). This is perfect for testing, so extend
bpf_syscall_macro selftest to also trace splice() syscall, using
BPF_KSYSCALL() macro. This makes sure all the syscall argument register
definitions are correct.

Suggested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Alan Maguire <alan.maguire@oracle.com> # arm64
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com> # s390x
Link: https://lore.kernel.org/bpf/20230120200914.3008030-25-andrii@kernel.org
---
 .../bpf/prog_tests/test_bpf_syscall_macro.c        | 17 ++++++++++++++
 .../selftests/bpf/progs/bpf_syscall_macro.c        | 26 ++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
index c381faaae741..2900c5e9a016 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright 2022 Sony Group Corporation */
+#define _GNU_SOURCE
+#include <fcntl.h>
 #include <sys/prctl.h>
 #include <test_progs.h>
 #include "bpf_syscall_macro.skel.h"
@@ -13,6 +15,8 @@ void test_bpf_syscall_macro(void)
 	unsigned long exp_arg3 = 13;
 	unsigned long exp_arg4 = 14;
 	unsigned long exp_arg5 = 15;
+	loff_t off_in, off_out;
+	ssize_t r;
 
 	/* check whether it can open program */
 	skel = bpf_syscall_macro__open();
@@ -33,6 +37,7 @@ void test_bpf_syscall_macro(void)
 
 	/* check whether args of syscall are copied correctly */
 	prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
+
 #if defined(__aarch64__) || defined(__s390__)
 	ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
 #else
@@ -68,6 +73,18 @@ void test_bpf_syscall_macro(void)
 	ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
 	ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
 
+	r = splice(-42, &off_in, 42, &off_out, 0x12340000, SPLICE_F_NONBLOCK);
+	err = -errno;
+	ASSERT_EQ(r, -1, "splice_res");
+	ASSERT_EQ(err, -EBADF, "splice_err");
+
+	ASSERT_EQ(skel->bss->splice_fd_in, -42, "splice_arg1");
+	ASSERT_EQ(skel->bss->splice_off_in, (__u64)&off_in, "splice_arg2");
+	ASSERT_EQ(skel->bss->splice_fd_out, 42, "splice_arg3");
+	ASSERT_EQ(skel->bss->splice_off_out, (__u64)&off_out, "splice_arg4");
+	ASSERT_EQ(skel->bss->splice_len, 0x12340000, "splice_arg5");
+	ASSERT_EQ(skel->bss->splice_flags, SPLICE_F_NONBLOCK, "splice_arg6");
+
 cleanup:
 	bpf_syscall_macro__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
index e1e11897e99b..1a476d8ed354 100644
--- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -81,4 +81,30 @@ int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2,
 	return 0;
 }
 
+__u64 splice_fd_in;
+__u64 splice_off_in;
+__u64 splice_fd_out;
+__u64 splice_off_out;
+__u64 splice_len;
+__u64 splice_flags;
+
+SEC("ksyscall/splice")
+int BPF_KSYSCALL(splice_enter, int fd_in, loff_t *off_in, int fd_out,
+		 loff_t *off_out, size_t len, unsigned int flags)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != filter_pid)
+		return 0;
+
+	splice_fd_in = fd_in;
+	splice_off_in = (__u64)off_in;
+	splice_fd_out = fd_out;
+	splice_off_out = (__u64)off_out;
+	splice_len = len;
+	splice_flags = flags;
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 057fb03160a88925877548979ab4ab7f9223e118 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 20 Jan 2023 18:11:36 +0000
Subject: selftests: net: tcp_mmap: populate pages in send path

In commit 72653ae5303c ("selftests: net: tcp_mmap:
Use huge pages in send path") I made a change to use hugepages
for the buffer used by the client (tx path)

Today, I understood that the cause for poor zerocopy
performance was that after a mmap() for a 512KB memory
zone, kernel uses a single zeropage, mapped 128 times.

This was really the reason for poor tx path performance
in zero copy mode, because this zero page refcount is
under high pressure, especially when TCP ACK packets
are processed on another cpu.

We need either to force a COW on all the memory range,
or use MAP_POPULATE so that a zero page is not abused.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230120181136.3764521-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/tcp_mmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 00f837c9bc6c..46a02bbd31d0 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -137,7 +137,8 @@ static void *mmap_large_buffer(size_t need, size_t *allocated)
 	if (buffer == (void *)-1) {
 		sz = need;
 		buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
-			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+			      MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE,
+			      -1, 0);
 		if (buffer != (void *)-1)
 			fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
 	}
-- 
cgit v1.2.3


From ca22da2fbd693b54dc8e3b7b54ccc9f7e9ba3640 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 20 Jan 2023 18:01:40 +0100
Subject: act_mirred: use the backlog for nested calls to mirred ingress

William reports kernel soft-lockups on some OVS topologies when TC mirred
egress->ingress action is hit by local TCP traffic [1].
The same can also be reproduced with SCTP (thanks Xin for verifying), when
client and server reach themselves through mirred egress to ingress, and
one of the two peers sends a "heartbeat" packet (from within a timer).

Enqueueing to backlog proved to fix this soft lockup; however, as Cong
noticed [2], we should preserve - when possible - the current mirred
behavior that counts as "overlimits" any eventual packet drop subsequent to
the mirred forwarding action [3]. A compromise solution might use the
backlog only when tcf_mirred_act() has a nest level greater than one:
change tcf_mirred_forward() accordingly.

Also, add a kselftest that can reproduce the lockup and verifies TC mirred
ability to account for further packet drops after TC mirred egress->ingress
(when the nest level is 1).

 [1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
 [2] https://lore.kernel.org/netdev/Y0w%2FWWY60gqrtGLp@pop-os.localdomain/
 [3] such behavior is not guaranteed: for example, if RPS or skb RX
     timestamping is enabled on the mirred target device, the kernel
     can defer receiving the skb and return NET_RX_SUCCESS inside
     tcf_mirred_forward().

Reported-by: William Zhao <wizhao@redhat.com>
CC: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sched/act_mirred.c                             |  7 ++++
 .../testing/selftests/net/forwarding/tc_actions.sh | 49 +++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c8abb5136491..8037ec9b1d31 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -206,12 +206,19 @@ release_idr:
 	return err;
 }
 
+static bool is_mirred_nested(void)
+{
+	return unlikely(__this_cpu_read(mirred_nest_level) > 1);
+}
+
 static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
 {
 	int err;
 
 	if (!want_ingress)
 		err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
+	else if (is_mirred_nested())
+		err = netif_rx(skb);
 	else
 		err = netif_receive_skb(skb);
 
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 1e0a62f638fe..919c0dd9fe4b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -3,7 +3,8 @@
 
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
 	mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
-	gact_trap_test mirred_egress_to_ingress_test"
+	gact_trap_test mirred_egress_to_ingress_test \
+	mirred_egress_to_ingress_tcp_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -198,6 +199,52 @@ mirred_egress_to_ingress_test()
 	log_test "mirred_egress_to_ingress ($tcflags)"
 }
 
+mirred_egress_to_ingress_tcp_test()
+{
+	local tmpfile=$(mktemp) tmpfile1=$(mktemp)
+
+	RET=0
+	dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile
+	tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
+		$tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
+			action ct commit nat src addr 192.0.2.2 pipe \
+			action ct clear pipe \
+			action ct commit nat dst addr 192.0.2.1 pipe \
+			action ct clear pipe \
+			action skbedit ptype host pipe \
+			action mirred ingress redirect dev $h1
+	tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
+		$tcflags ip_proto icmp \
+			action mirred ingress redirect dev $h1
+	tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
+		ip_proto icmp \
+			action drop
+
+	ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1  &
+	local rpid=$!
+	ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile
+	wait -n $rpid
+	cmp -s $tmpfile $tmpfile1
+	check_err $? "server output check failed"
+
+	$MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
+		-t icmp "ping,id=42,seq=5" -q
+	tc_check_packets "dev $h1 egress" 101 10
+	check_err $? "didn't mirred redirect ICMP"
+	tc_check_packets "dev $h1 ingress" 102 10
+	check_err $? "didn't drop mirred ICMP"
+	local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
+	test ${overlimits} = 10
+	check_err $? "wrong overlimits, expected 10 got ${overlimits}"
+
+	tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
+	tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
+	tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
+
+	rm -f $tmpfile $tmpfile1
+	log_test "mirred_egress_to_ingress_tcp ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
cgit v1.2.3


From caf713c338bd95bf9ac003d8985d2c4e46d452dd Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 25 Jan 2023 08:38:10 -0600
Subject: bpf: Disallow NULLable pointers for trusted kfuncs

KF_TRUSTED_ARGS kfuncs currently have a subtle and insidious bug in
validating pointers to scalars. Say that you have a kfunc like the
following, which takes an array as the first argument:

bool bpf_cpumask_empty(const struct cpumask *cpumask)
{
	return cpumask_empty(cpumask);
}

...
BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
...

If a BPF program were to invoke the kfunc with a NULL argument, it would
crash the kernel. The reason is that struct cpumask is defined as a
bitmap, which is itself defined as an array, and is accessed as a memory
address by bitmap operations. So when the verifier analyzes the
register, it interprets it as a pointer to a scalar struct, which is an
array of size 8. check_mem_reg() then sees that the register is NULL and
returns 0, and the kfunc crashes when it passes it down to the cpumask
wrappers.

To fix this, this patch adds a check for KF_ARG_PTR_TO_MEM which
verifies that the register doesn't contain a possibly-NULL pointer if
the kfunc is KF_TRUSTED_ARGS.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230125143816.721952-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                                  | 6 ++++++
 tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c    | 4 ++--
 tools/testing/selftests/bpf/progs/task_kfunc_failure.c | 4 ++--
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 66ec577fcb8b..bb38b01b738f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9194,6 +9194,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			return -EINVAL;
 		}
 
+		if (is_kfunc_trusted_args(meta) &&
+		    (register_is_null(reg) || type_may_be_null(reg->type))) {
+			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
+			return -EACCES;
+		}
+
 		if (reg->ref_obj_id) {
 			if (is_kfunc_release(meta) && meta->ref_obj_id) {
 				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index 973f0c5af965..f3bb0e16e088 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -93,11 +93,11 @@ static struct {
 	const char *prog_name;
 	const char *expected_err_msg;
 } failure_tests[] = {
-	{"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
+	{"cgrp_kfunc_acquire_untrusted", "Possibly NULL pointer passed to trusted arg0"},
 	{"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"},
 	{"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
 	{"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
-	{"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup must point"},
+	{"cgrp_kfunc_acquire_null", "Possibly NULL pointer passed to trusted arg0"},
 	{"cgrp_kfunc_acquire_unreleased", "Unreleased reference"},
 	{"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
 	{"cgrp_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index e6950d6a9cf0..f19d54eda4f1 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -28,7 +28,7 @@ static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *ta
 }
 
 SEC("tp_btf/task_newtask")
-__failure __msg("R1 must be referenced or trusted")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
@@ -86,7 +86,7 @@ int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 cl
 
 
 SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 pointer type STRUCT task_struct must point")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
-- 
cgit v1.2.3


From a6541f4d280465e3dff08a45734c0d4ac3f363a4 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 25 Jan 2023 08:38:12 -0600
Subject: selftests/bpf: Add nested trust selftests suite

Now that defining trusted fields in a struct is supported, we should add
selftests to verify the behavior. This patch adds a few such testcases.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230125143816.721952-4-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |  1 +
 .../selftests/bpf/prog_tests/nested_trust.c        | 12 ++++++++
 .../selftests/bpf/progs/nested_trust_common.h      | 12 ++++++++
 .../selftests/bpf/progs/nested_trust_failure.c     | 33 ++++++++++++++++++++++
 .../selftests/bpf/progs/nested_trust_success.c     | 19 +++++++++++++
 5 files changed, 77 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/nested_trust.c
 create mode 100644 tools/testing/selftests/bpf/progs/nested_trust_common.h
 create mode 100644 tools/testing/selftests/bpf/progs/nested_trust_failure.c
 create mode 100644 tools/testing/selftests/bpf/progs/nested_trust_success.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index b9ef9cc61d36..f18e367ca4b9 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -44,6 +44,7 @@ map_kptr                                 # failed to open_and_load program: -524
 modify_return                            # modify_return attach failed: -524                                           (trampoline)
 module_attach                            # skel_attach skeleton attach failed: -524                                    (trampoline)
 mptcp
+nested_trust                             # JIT does not support calling kernel function
 netcnt                                   # failed to load BPF skeleton 'netcnt_prog': -7                               (?)
 probe_user                               # check_kprobe_res wrong kprobe res from probe read                           (?)
 rcu_read_lock                            # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3                (?)
diff --git a/tools/testing/selftests/bpf/prog_tests/nested_trust.c b/tools/testing/selftests/bpf/prog_tests/nested_trust.c
new file mode 100644
index 000000000000..39886f58924e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/nested_trust.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "nested_trust_failure.skel.h"
+#include "nested_trust_success.skel.h"
+
+void test_nested_trust(void)
+{
+	RUN_TESTS(nested_trust_success);
+	RUN_TESTS(nested_trust_failure);
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h
new file mode 100644
index 000000000000..83d33931136e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _NESTED_TRUST_COMMON_H
+#define _NESTED_TRUST_COMMON_H
+
+#include <stdbool.h>
+
+bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym;
+bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+
+#endif /* _NESTED_TRUST_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
new file mode 100644
index 000000000000..14aff7676436
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "nested_trust_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ *         TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R2 must be referenced or trusted")
+int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags)
+{
+	bpf_cpumask_test_cpu(0, task->user_cpus_ptr);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R1 must have zero offset when passed to release func or trusted arg to kfunc")
+int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_flags)
+{
+	bpf_cpumask_first_zero(&task->cpus_mask);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c
new file mode 100644
index 000000000000..886ade4aa99d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "nested_trust_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tp_btf/task_newtask")
+__success
+int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags)
+{
+	bpf_cpumask_test_cpu(0, task->cpus_ptr);
+	return 0;
+}
-- 
cgit v1.2.3


From 7b6abcfa15cd18de21e5bfb952df57268af2f041 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 25 Jan 2023 08:38:13 -0600
Subject: selftests/bpf: Add selftest suite for cpumask kfuncs

A recent patch added a new set of kfuncs for allocating, freeing,
manipulating, and querying cpumasks. This patch adds a new 'cpumask'
selftest suite which verifies their behavior.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230125143816.721952-5-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 tools/testing/selftests/bpf/prog_tests/cpumask.c   |  74 ++++
 tools/testing/selftests/bpf/progs/cpumask_common.h | 114 ++++++
 .../testing/selftests/bpf/progs/cpumask_failure.c  | 126 ++++++
 .../testing/selftests/bpf/progs/cpumask_success.c  | 426 +++++++++++++++++++++
 5 files changed, 741 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cpumask.c
 create mode 100644 tools/testing/selftests/bpf/progs/cpumask_common.h
 create mode 100644 tools/testing/selftests/bpf/progs/cpumask_failure.c
 create mode 100644 tools/testing/selftests/bpf/progs/cpumask_success.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index f18e367ca4b9..50924611e5bb 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -13,6 +13,7 @@ cgroup_hierarchical_stats                # JIT does not support calling kernel f
 cgrp_kfunc                               # JIT does not support calling kernel function
 cgrp_local_storage                       # prog_attach unexpected error: -524                                          (trampoline)
 core_read_macros                         # unknown func bpf_probe_read#4                                               (overlapping)
+cpumask                                  # JIT does not support calling kernel function
 d_path                                   # failed to auto-attach program 'prog_stat': -524                             (trampoline)
 decap_sanity                             # JIT does not support calling kernel function                                (kfunc)
 deny_namespace                           # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
new file mode 100644
index 000000000000..5fbe457c4ebe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "cpumask_failure.skel.h"
+#include "cpumask_success.skel.h"
+
+static const char * const cpumask_success_testcases[] = {
+	"test_alloc_free_cpumask",
+	"test_set_clear_cpu",
+	"test_setall_clear_cpu",
+	"test_first_firstzero_cpu",
+	"test_test_and_set_clear",
+	"test_and_or_xor",
+	"test_intersects_subset",
+	"test_copy_any_anyand",
+	"test_insert_leave",
+	"test_insert_remove_release",
+	"test_insert_kptr_get_release",
+};
+
+static void verify_success(const char *prog_name)
+{
+	struct cpumask_success *skel;
+	struct bpf_program *prog;
+	struct bpf_link *link = NULL;
+	pid_t child_pid;
+	int status;
+
+	skel = cpumask_success__open();
+	if (!ASSERT_OK_PTR(skel, "cpumask_success__open"))
+		return;
+
+	skel->bss->pid = getpid();
+	skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+	cpumask_success__load(skel);
+	if (!ASSERT_OK_PTR(skel, "cpumask_success__load"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	link = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+		goto cleanup;
+
+	child_pid = fork();
+	if (!ASSERT_GT(child_pid, -1, "child_pid"))
+		goto cleanup;
+	if (child_pid == 0)
+		_exit(0);
+	waitpid(child_pid, &status, 0);
+	ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+	bpf_link__destroy(link);
+	cpumask_success__destroy(skel);
+}
+
+void test_cpumask(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cpumask_success_testcases); i++) {
+		if (!test__start_subtest(cpumask_success_testcases[i]))
+			continue;
+
+		verify_success(cpumask_success_testcases[i]);
+	}
+
+	RUN_TESTS(cpumask_failure);
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
new file mode 100644
index 000000000000..ad34f3b602be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CPUMASK_COMMON_H
+#define _CPUMASK_COMMON_H
+
+#include "errno.h"
+#include <stdbool.h>
+
+int err;
+
+struct __cpumask_map_value {
+	struct bpf_cpumask __kptr_ref * cpumask;
+};
+
+struct array_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct __cpumask_map_value);
+	__uint(max_entries, 1);
+} __cpumask_map SEC(".maps");
+
+struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
+void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
+struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
+struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym;
+u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
+u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
+void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
+bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym;
+bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
+bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
+void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym;
+void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym;
+bool bpf_cpumask_and(struct bpf_cpumask *cpumask,
+		     const struct cpumask *src1,
+		     const struct cpumask *src2) __ksym;
+void bpf_cpumask_or(struct bpf_cpumask *cpumask,
+		    const struct cpumask *src1,
+		    const struct cpumask *src2) __ksym;
+void bpf_cpumask_xor(struct bpf_cpumask *cpumask,
+		     const struct cpumask *src1,
+		     const struct cpumask *src2) __ksym;
+bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym;
+bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
+void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
+u32 bpf_cpumask_any(const struct cpumask *src) __ksym;
+u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+
+static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
+{
+	return (const struct cpumask *)cpumask;
+}
+
+static inline struct bpf_cpumask *create_cpumask(void)
+{
+	struct bpf_cpumask *cpumask;
+
+	cpumask = bpf_cpumask_create();
+	if (!cpumask) {
+		err = 1;
+		return NULL;
+	}
+
+	if (!bpf_cpumask_empty(cast(cpumask))) {
+		err = 2;
+		bpf_cpumask_release(cpumask);
+		return NULL;
+	}
+
+	return cpumask;
+}
+
+static inline struct __cpumask_map_value *cpumask_map_value_lookup(void)
+{
+	u32 key = 0;
+
+	return bpf_map_lookup_elem(&__cpumask_map, &key);
+}
+
+static inline int cpumask_map_insert(struct bpf_cpumask *mask)
+{
+	struct __cpumask_map_value local, *v;
+	long status;
+	struct bpf_cpumask *old;
+	u32 key = 0;
+
+	local.cpumask = NULL;
+	status = bpf_map_update_elem(&__cpumask_map, &key, &local, 0);
+	if (status) {
+		bpf_cpumask_release(mask);
+		return status;
+	}
+
+	v = bpf_map_lookup_elem(&__cpumask_map, &key);
+	if (!v) {
+		bpf_cpumask_release(mask);
+		return -ENOENT;
+	}
+
+	old = bpf_kptr_xchg(&v->cpumask, mask);
+	if (old) {
+		bpf_cpumask_release(old);
+		return -EEXIST;
+	}
+
+	return 0;
+}
+
+#endif /* _CPUMASK_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
new file mode 100644
index 000000000000..33e8e86dd090
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "cpumask_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ *         TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	cpumask = create_cpumask();
+
+	/* cpumask is never released. */
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg0")
+int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	cpumask = create_cpumask();
+
+	/* cpumask is released twice. */
+	bpf_cpumask_release(cpumask);
+	bpf_cpumask_release(cpumask);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask")
+int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	/* Can't acquire a non-struct bpf_cpumask. */
+	cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask")
+int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	/* Can't set the CPU of a non-struct bpf_cpumask. */
+	bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr);
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+	struct __cpumask_map_value *v;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	if (cpumask_map_insert(cpumask))
+		return 0;
+
+	v = cpumask_map_value_lookup();
+	if (!v)
+		return 0;
+
+	cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
+
+	/* cpumask is never released. */
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+	struct __cpumask_map_value *v;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	if (cpumask_map_insert(cpumask))
+		return 0;
+
+	v = cpumask_map_value_lookup();
+	if (!v)
+		return 0;
+
+	cpumask = bpf_cpumask_kptr_get(&v->cpumask);
+
+	/* cpumask is never released. */
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg0")
+int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
+{
+  /* NULL passed to KF_TRUSTED_ARGS kfunc. */
+	bpf_cpumask_empty(NULL);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
new file mode 100644
index 000000000000..1d38bc65d4b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "cpumask_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid, nr_cpus;
+
+static bool is_test_task(void)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	return pid == cur_pid;
+}
+
+static bool create_cpumask_set(struct bpf_cpumask **out1,
+			       struct bpf_cpumask **out2,
+			       struct bpf_cpumask **out3,
+			       struct bpf_cpumask **out4)
+{
+	struct bpf_cpumask *mask1, *mask2, *mask3, *mask4;
+
+	mask1 = create_cpumask();
+	if (!mask1)
+		return false;
+
+	mask2 = create_cpumask();
+	if (!mask2) {
+		bpf_cpumask_release(mask1);
+		err = 3;
+		return false;
+	}
+
+	mask3 = create_cpumask();
+	if (!mask3) {
+		bpf_cpumask_release(mask1);
+		bpf_cpumask_release(mask2);
+		err = 4;
+		return false;
+	}
+
+	mask4 = create_cpumask();
+	if (!mask4) {
+		bpf_cpumask_release(mask1);
+		bpf_cpumask_release(mask2);
+		bpf_cpumask_release(mask3);
+		err = 5;
+		return false;
+	}
+
+	*out1 = mask1;
+	*out2 = mask2;
+	*out3 = mask3;
+	*out4 = mask4;
+
+	return true;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_alloc_free_cpumask, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	if (!is_test_task())
+		return 0;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	bpf_cpumask_release(cpumask);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_set_clear_cpu, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	if (!is_test_task())
+		return 0;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	bpf_cpumask_set_cpu(0, cpumask);
+	if (!bpf_cpumask_test_cpu(0, cast(cpumask))) {
+		err = 3;
+		goto release_exit;
+	}
+
+	bpf_cpumask_clear_cpu(0, cpumask);
+	if (bpf_cpumask_test_cpu(0, cast(cpumask))) {
+		err = 4;
+		goto release_exit;
+	}
+
+release_exit:
+	bpf_cpumask_release(cpumask);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_setall_clear_cpu, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	if (!is_test_task())
+		return 0;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	bpf_cpumask_setall(cpumask);
+	if (!bpf_cpumask_full(cast(cpumask))) {
+		err = 3;
+		goto release_exit;
+	}
+
+	bpf_cpumask_clear(cpumask);
+	if (!bpf_cpumask_empty(cast(cpumask))) {
+		err = 4;
+		goto release_exit;
+	}
+
+release_exit:
+	bpf_cpumask_release(cpumask);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_first_firstzero_cpu, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	if (!is_test_task())
+		return 0;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	if (bpf_cpumask_first(cast(cpumask)) < nr_cpus) {
+		err = 3;
+		goto release_exit;
+	}
+
+	if (bpf_cpumask_first_zero(cast(cpumask)) != 0) {
+		bpf_printk("first zero: %d", bpf_cpumask_first_zero(cast(cpumask)));
+		err = 4;
+		goto release_exit;
+	}
+
+	bpf_cpumask_set_cpu(0, cpumask);
+	if (bpf_cpumask_first(cast(cpumask)) != 0) {
+		err = 5;
+		goto release_exit;
+	}
+
+	if (bpf_cpumask_first_zero(cast(cpumask)) != 1) {
+		err = 6;
+		goto release_exit;
+	}
+
+release_exit:
+	bpf_cpumask_release(cpumask);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+
+	if (!is_test_task())
+		return 0;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	if (bpf_cpumask_test_and_set_cpu(0, cpumask)) {
+		err = 3;
+		goto release_exit;
+	}
+
+	if (!bpf_cpumask_test_and_set_cpu(0, cpumask)) {
+		err = 4;
+		goto release_exit;
+	}
+
+	if (!bpf_cpumask_test_and_clear_cpu(0, cpumask)) {
+		err = 5;
+		goto release_exit;
+	}
+
+release_exit:
+	bpf_cpumask_release(cpumask);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_and_or_xor, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+
+	if (!is_test_task())
+		return 0;
+
+	if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+		return 0;
+
+	bpf_cpumask_set_cpu(0, mask1);
+	bpf_cpumask_set_cpu(1, mask2);
+
+	if (bpf_cpumask_and(dst1, cast(mask1), cast(mask2))) {
+		err = 6;
+		goto release_exit;
+	}
+	if (!bpf_cpumask_empty(cast(dst1))) {
+		err = 7;
+		goto release_exit;
+	}
+
+	bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+	if (!bpf_cpumask_test_cpu(0, cast(dst1))) {
+		err = 8;
+		goto release_exit;
+	}
+	if (!bpf_cpumask_test_cpu(1, cast(dst1))) {
+		err = 9;
+		goto release_exit;
+	}
+
+	bpf_cpumask_xor(dst2, cast(mask1), cast(mask2));
+	if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
+		err = 10;
+		goto release_exit;
+	}
+
+release_exit:
+	bpf_cpumask_release(mask1);
+	bpf_cpumask_release(mask2);
+	bpf_cpumask_release(dst1);
+	bpf_cpumask_release(dst2);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_intersects_subset, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+
+	if (!is_test_task())
+		return 0;
+
+	if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+		return 0;
+
+	bpf_cpumask_set_cpu(0, mask1);
+	bpf_cpumask_set_cpu(1, mask2);
+	if (bpf_cpumask_intersects(cast(mask1), cast(mask2))) {
+		err = 6;
+		goto release_exit;
+	}
+
+	bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+	if (!bpf_cpumask_subset(cast(mask1), cast(dst1))) {
+		err = 7;
+		goto release_exit;
+	}
+
+	if (!bpf_cpumask_subset(cast(mask2), cast(dst1))) {
+		err = 8;
+		goto release_exit;
+	}
+
+	if (bpf_cpumask_subset(cast(dst1), cast(mask1))) {
+		err = 9;
+		goto release_exit;
+	}
+
+release_exit:
+	bpf_cpumask_release(mask1);
+	bpf_cpumask_release(mask2);
+	bpf_cpumask_release(dst1);
+	bpf_cpumask_release(dst2);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+	u32 cpu;
+
+	if (!is_test_task())
+		return 0;
+
+	if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+		return 0;
+
+	bpf_cpumask_set_cpu(0, mask1);
+	bpf_cpumask_set_cpu(1, mask2);
+	bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+
+	cpu = bpf_cpumask_any(cast(mask1));
+	if (cpu != 0) {
+		err = 6;
+		goto release_exit;
+	}
+
+	cpu = bpf_cpumask_any(cast(dst2));
+	if (cpu < nr_cpus) {
+		err = 7;
+		goto release_exit;
+	}
+
+	bpf_cpumask_copy(dst2, cast(dst1));
+	if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
+		err = 8;
+		goto release_exit;
+	}
+
+	cpu = bpf_cpumask_any(cast(dst2));
+	if (cpu > 1) {
+		err = 9;
+		goto release_exit;
+	}
+
+	cpu = bpf_cpumask_any_and(cast(mask1), cast(mask2));
+	if (cpu < nr_cpus) {
+		err = 10;
+		goto release_exit;
+	}
+
+release_exit:
+	bpf_cpumask_release(mask1);
+	bpf_cpumask_release(mask2);
+	bpf_cpumask_release(dst1);
+	bpf_cpumask_release(dst2);
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+	struct __cpumask_map_value *v;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	if (cpumask_map_insert(cpumask))
+		err = 3;
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+	struct __cpumask_map_value *v;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	if (cpumask_map_insert(cpumask)) {
+		err = 3;
+		return 0;
+	}
+
+	v = cpumask_map_value_lookup();
+	if (!v) {
+		err = 4;
+		return 0;
+	}
+
+	cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
+	if (cpumask)
+		bpf_cpumask_release(cpumask);
+	else
+		err = 5;
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *cpumask;
+	struct __cpumask_map_value *v;
+
+	cpumask = create_cpumask();
+	if (!cpumask)
+		return 0;
+
+	if (cpumask_map_insert(cpumask)) {
+		err = 3;
+		return 0;
+	}
+
+	v = cpumask_map_value_lookup();
+	if (!v) {
+		err = 4;
+		return 0;
+	}
+
+	cpumask = bpf_cpumask_kptr_get(&v->cpumask);
+	if (cpumask)
+		bpf_cpumask_release(cpumask);
+	else
+		err = 5;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 2514a31241e1e9067d379e0fbdb60e4bc2bf4659 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Wed, 25 Jan 2023 19:04:40 +0900
Subject: selftests/bpf: Fix vmtest static compilation error

As stated in README.rst, in order to resolve errors with linker errors,
'LDLIBS=-static' should be used. Most problems will be solved by this
option, but in the case of urandom_read, this won't fix the problem. So
the Makefile is currently implemented to strip the 'static' option when
compiling the urandom_read. However, stripping this static option isn't
configured properly on $(LDLIBS) correctly, which is now causing errors
on static compilation.

    # LDLIBS=-static ./vmtest.sh
    ld.lld: error: attempted static link of dynamic object liburandom_read.so
    clang: error: linker command failed with exit code 1 (use -v to see invocation)
    make: *** [Makefile:190: /linux/tools/testing/selftests/bpf/urandom_read] Error 1
    make: *** Waiting for unfinished jobs....

This commit fixes this problem by configuring the strip with $(LDLIBS).

Fixes: 68084a136420 ("selftests/bpf: Fix building bpf selftests statically")
Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230125100440.21734-1-danieltimlee@gmail.com
---
 tools/testing/selftests/bpf/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a554b41fe872..53eae7be8dff 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -181,14 +181,15 @@ endif
 # do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
 $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
 	$(call msg,LIB,,$@)
-	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS)   \
+	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS))   \
+		     $^ $(filter-out -static,$(LDLIBS))	     \
 		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
 		     -fPIC -shared -o $@
 
 $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
 	$(call msg,BINARY,,$@)
 	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
-		     liburandom_read.so $(LDLIBS)			       \
+		     liburandom_read.so $(filter-out -static,$(LDLIBS))	     \
 		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
 		     -Wl,-rpath=. -o $@
 
-- 
cgit v1.2.3


From 1e12d3ef47d228e4e7d30f9bc5e6744ede90319c Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 25 Jan 2023 10:47:32 -0600
Subject: bpf: Allow BPF_PROG_TYPE_STRUCT_OPS programs to be sleepable

BPF struct_ops programs currently cannot be marked as sleepable. This
need not be the case -- struct_ops programs can be sleepable, and e.g.
invoke kfuncs that export the KF_SLEEPABLE flag. So as to allow future
struct_ops programs to invoke such kfuncs, this patch updates the
verifier to allow struct_ops programs to be sleepable. A follow-on patch
will add support to libbpf for specifying struct_ops.s as a sleepable
struct_ops program, and then another patch will add testcases to the
dummy_st_ops selftest suite which test sleepable struct_ops behavior.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230125164735.785732-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                            | 5 +++--
 tools/testing/selftests/bpf/verifier/sleepable.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bb38b01b738f..c8907df49f81 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17114,7 +17114,8 @@ static bool can_be_sleepable(struct bpf_prog *prog)
 		}
 	}
 	return prog->type == BPF_PROG_TYPE_LSM ||
-	       prog->type == BPF_PROG_TYPE_KPROBE; /* only for uprobes */
+	       prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
+	       prog->type == BPF_PROG_TYPE_STRUCT_OPS;
 }
 
 static int check_attach_btf_id(struct bpf_verifier_env *env)
@@ -17136,7 +17137,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	}
 
 	if (prog->aux->sleepable && !can_be_sleepable(prog)) {
-		verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter and uprobe programs can be sleepable\n");
+		verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
 		return -EINVAL;
 	}
 
diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c
index bea0daef908a..1f0d2bdc673f 100644
--- a/tools/testing/selftests/bpf/verifier/sleepable.c
+++ b/tools/testing/selftests/bpf/verifier/sleepable.c
@@ -85,7 +85,7 @@
 	.expected_attach_type = BPF_TRACE_RAW_TP,
 	.kfunc = "sched_switch",
 	.result = REJECT,
-	.errstr = "Only fentry/fexit/fmod_ret, lsm, iter and uprobe programs can be sleepable",
+	.errstr = "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable",
 	.flags = BPF_F_SLEEPABLE,
 	.runs = -1,
 },
-- 
cgit v1.2.3


From 7dd880592a88799f3ef48fda507849a75f11cbf0 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 25 Jan 2023 10:47:35 -0600
Subject: bpf/selftests: Verify struct_ops prog sleepable behavior

In a set of prior changes, we added the ability for struct_ops programs
to be sleepable. This patch enhances the dummy_st_ops selftest suite to
validate this behavior by adding a new sleepable struct_ops entry to
dummy_st_ops.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230125164735.785732-5-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  1 +
 net/bpf/bpf_dummy_struct_ops.c                     | 18 ++++++++
 .../selftests/bpf/prog_tests/dummy_st_ops.c        | 52 ++++++++++++++++------
 tools/testing/selftests/bpf/progs/dummy_st_ops.c   | 50 ---------------------
 .../selftests/bpf/progs/dummy_st_ops_fail.c        | 27 +++++++++++
 .../selftests/bpf/progs/dummy_st_ops_success.c     | 47 +++++++++++++++++++
 6 files changed, 132 insertions(+), 63 deletions(-)
 delete mode 100644 tools/testing/selftests/bpf/progs/dummy_st_ops.c
 create mode 100644 tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
 create mode 100644 tools/testing/selftests/bpf/progs/dummy_st_ops_success.c

(limited to 'tools/testing')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0d868ef1b973..14a0264fac57 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1474,6 +1474,7 @@ struct bpf_dummy_ops {
 	int (*test_1)(struct bpf_dummy_ops_state *cb);
 	int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
 		      char a3, unsigned long a4);
+	int (*test_sleepable)(struct bpf_dummy_ops_state *cb);
 };
 
 int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 1ac4467928a9..ff4f89a2b02a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -154,6 +154,23 @@ static bool bpf_dummy_ops_is_valid_access(int off, int size,
 	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
 }
 
+static int bpf_dummy_ops_check_member(const struct btf_type *t,
+				      const struct btf_member *member,
+				      const struct bpf_prog *prog)
+{
+	u32 moff = __btf_member_bit_offset(t, member) / 8;
+
+	switch (moff) {
+	case offsetof(struct bpf_dummy_ops, test_sleepable):
+		break;
+	default:
+		if (prog->aux->sleepable)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
 					   const struct bpf_reg_state *reg,
 					   int off, int size, enum bpf_access_type atype,
@@ -208,6 +225,7 @@ static void bpf_dummy_unreg(void *kdata)
 struct bpf_struct_ops bpf_bpf_dummy_ops = {
 	.verifier_ops = &bpf_dummy_verifier_ops,
 	.init = bpf_dummy_init,
+	.check_member = bpf_dummy_ops_check_member,
 	.init_member = bpf_dummy_init_member,
 	.reg = bpf_dummy_reg,
 	.unreg = bpf_dummy_unreg,
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
index c11832657d2b..f43fcb13d2c4 100644
--- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (C) 2021. Huawei Technologies Co., Ltd */
 #include <test_progs.h>
-#include "dummy_st_ops.skel.h"
+#include "dummy_st_ops_success.skel.h"
+#include "dummy_st_ops_fail.skel.h"
 #include "trace_dummy_st_ops.skel.h"
 
 /* Need to keep consistent with definition in include/linux/bpf.h */
@@ -11,17 +12,17 @@ struct bpf_dummy_ops_state {
 
 static void test_dummy_st_ops_attach(void)
 {
-	struct dummy_st_ops *skel;
+	struct dummy_st_ops_success *skel;
 	struct bpf_link *link;
 
-	skel = dummy_st_ops__open_and_load();
+	skel = dummy_st_ops_success__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
 		return;
 
 	link = bpf_map__attach_struct_ops(skel->maps.dummy_1);
 	ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach");
 
-	dummy_st_ops__destroy(skel);
+	dummy_st_ops_success__destroy(skel);
 }
 
 static void test_dummy_init_ret_value(void)
@@ -31,10 +32,10 @@ static void test_dummy_init_ret_value(void)
 		.ctx_in = args,
 		.ctx_size_in = sizeof(args),
 	);
-	struct dummy_st_ops *skel;
+	struct dummy_st_ops_success *skel;
 	int fd, err;
 
-	skel = dummy_st_ops__open_and_load();
+	skel = dummy_st_ops_success__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
 		return;
 
@@ -43,7 +44,7 @@ static void test_dummy_init_ret_value(void)
 	ASSERT_OK(err, "test_run");
 	ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
 
-	dummy_st_ops__destroy(skel);
+	dummy_st_ops_success__destroy(skel);
 }
 
 static void test_dummy_init_ptr_arg(void)
@@ -58,10 +59,10 @@ static void test_dummy_init_ptr_arg(void)
 		.ctx_size_in = sizeof(args),
 	);
 	struct trace_dummy_st_ops *trace_skel;
-	struct dummy_st_ops *skel;
+	struct dummy_st_ops_success *skel;
 	int fd, err;
 
-	skel = dummy_st_ops__open_and_load();
+	skel = dummy_st_ops_success__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
 		return;
 
@@ -91,7 +92,7 @@ static void test_dummy_init_ptr_arg(void)
 	ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val");
 
 done:
-	dummy_st_ops__destroy(skel);
+	dummy_st_ops_success__destroy(skel);
 	trace_dummy_st_ops__destroy(trace_skel);
 }
 
@@ -102,12 +103,12 @@ static void test_dummy_multiple_args(void)
 		.ctx_in = args,
 		.ctx_size_in = sizeof(args),
 	);
-	struct dummy_st_ops *skel;
+	struct dummy_st_ops_success *skel;
 	int fd, err;
 	size_t i;
 	char name[8];
 
-	skel = dummy_st_ops__open_and_load();
+	skel = dummy_st_ops_success__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
 		return;
 
@@ -119,7 +120,28 @@ static void test_dummy_multiple_args(void)
 		ASSERT_EQ(skel->bss->test_2_args[i], args[i], name);
 	}
 
-	dummy_st_ops__destroy(skel);
+	dummy_st_ops_success__destroy(skel);
+}
+
+static void test_dummy_sleepable(void)
+{
+	__u64 args[1] = {0};
+	LIBBPF_OPTS(bpf_test_run_opts, attr,
+		.ctx_in = args,
+		.ctx_size_in = sizeof(args),
+	);
+	struct dummy_st_ops_success *skel;
+	int fd, err;
+
+	skel = dummy_st_ops_success__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+		return;
+
+	fd = bpf_program__fd(skel->progs.test_sleepable);
+	err = bpf_prog_test_run_opts(fd, &attr);
+	ASSERT_OK(err, "test_run");
+
+	dummy_st_ops_success__destroy(skel);
 }
 
 void test_dummy_st_ops(void)
@@ -132,4 +154,8 @@ void test_dummy_st_ops(void)
 		test_dummy_init_ptr_arg();
 	if (test__start_subtest("dummy_multiple_args"))
 		test_dummy_multiple_args();
+	if (test__start_subtest("dummy_sleepable"))
+		test_dummy_sleepable();
+
+	RUN_TESTS(dummy_st_ops_fail);
 }
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops.c b/tools/testing/selftests/bpf/progs/dummy_st_ops.c
deleted file mode 100644
index ead87edb75e2..000000000000
--- a/tools/testing/selftests/bpf/progs/dummy_st_ops.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-struct bpf_dummy_ops_state {
-	int val;
-} __attribute__((preserve_access_index));
-
-struct bpf_dummy_ops {
-	int (*test_1)(struct bpf_dummy_ops_state *state);
-	int (*test_2)(struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
-		      char a3, unsigned long a4);
-};
-
-char _license[] SEC("license") = "GPL";
-
-SEC("struct_ops/test_1")
-int BPF_PROG(test_1, struct bpf_dummy_ops_state *state)
-{
-	int ret;
-
-	if (!state)
-		return 0xf2f3f4f5;
-
-	ret = state->val;
-	state->val = 0x5a;
-	return ret;
-}
-
-__u64 test_2_args[5];
-
-SEC("struct_ops/test_2")
-int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
-	     char a3, unsigned long a4)
-{
-	test_2_args[0] = (unsigned long)state;
-	test_2_args[1] = a1;
-	test_2_args[2] = a2;
-	test_2_args[3] = a3;
-	test_2_args[4] = a4;
-	return 0;
-}
-
-SEC(".struct_ops")
-struct bpf_dummy_ops dummy_1 = {
-	.test_1 = (void *)test_1,
-	.test_2 = (void *)test_2,
-};
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
new file mode 100644
index 000000000000..0bf969a0b5ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops.s/test_2")
+__failure __msg("attach to unsupported member test_2 of struct bpf_dummy_ops")
+int BPF_PROG(test_unsupported_field_sleepable,
+	     struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+	     char a3, unsigned long a4)
+{
+	/* Tries to mark an unsleepable field in struct bpf_dummy_ops as sleepable. */
+	return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+	.test_1 = NULL,
+	.test_2 = (void *)test_unsupported_field_sleepable,
+	.test_sleepable = (void *)NULL,
+};
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
new file mode 100644
index 000000000000..1efa746c25dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1, struct bpf_dummy_ops_state *state)
+{
+	int ret;
+
+	if (!state)
+		return 0xf2f3f4f5;
+
+	ret = state->val;
+	state->val = 0x5a;
+	return ret;
+}
+
+__u64 test_2_args[5];
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+	     char a3, unsigned long a4)
+{
+	test_2_args[0] = (unsigned long)state;
+	test_2_args[1] = a1;
+	test_2_args[2] = a2;
+	test_2_args[3] = a3;
+	test_2_args[4] = a4;
+	return 0;
+}
+
+SEC("struct_ops.s/test_sleepable")
+int BPF_PROG(test_sleepable, struct bpf_dummy_ops_state *state)
+{
+	return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+	.test_1 = (void *)test_1,
+	.test_2 = (void *)test_2,
+	.test_sleepable = (void *)test_sleepable,
+};
-- 
cgit v1.2.3


From d1246f93602316e2dda1000f185e8d13dd611871 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@meta.com>
Date: Wed, 25 Jan 2023 12:16:08 -0800
Subject: selftests/bpf: Calls bpf_setsockopt() on a ktls enabled socket.

Ensures that whenever bpf_setsockopt() is called with the SOL_TCP
option on a ktls enabled socket, the call will be accepted by the
system. The provided test makes sure of this by performing an
examination when the server side socket is in the CLOSE_WAIT state. At
this stage, ktls is still enabled on the server socket and can be used
to test if bpf_setsockopt() works correctly with linux.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
Link: https://lore.kernel.org/r/20230125201608.908230-3-kuifeng@meta.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/prog_tests/setget_sockopt.c      | 73 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/setget_sockopt.c |  8 +++
 2 files changed, 81 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index 018611e6b248..7d4a9b3d3722 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -4,6 +4,7 @@
 #define _GNU_SOURCE
 #include <sched.h>
 #include <linux/socket.h>
+#include <linux/tls.h>
 #include <net/if.h>
 
 #include "test_progs.h"
@@ -83,6 +84,76 @@ static void test_udp(int family)
 	ASSERT_EQ(bss->nr_binddev, 1, "nr_bind");
 }
 
+static void test_ktls(int family)
+{
+	struct tls12_crypto_info_aes_gcm_128 aes128;
+	struct setget_sockopt__bss *bss = skel->bss;
+	int cfd = -1, sfd = -1, fd = -1, ret;
+	char buf;
+
+	memset(bss, 0, sizeof(*bss));
+
+	sfd = start_server(family, SOCK_STREAM,
+			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+	if (!ASSERT_GE(sfd, 0, "start_server"))
+		return;
+	fd = connect_to_fd(sfd, 0);
+	if (!ASSERT_GE(fd, 0, "connect_to_fd"))
+		goto err_out;
+
+	cfd = accept(sfd, NULL, 0);
+	if (!ASSERT_GE(cfd, 0, "accept"))
+		goto err_out;
+
+	close(sfd);
+	sfd = -1;
+
+	/* Setup KTLS */
+	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (!ASSERT_OK(ret, "setsockopt"))
+		goto err_out;
+	ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (!ASSERT_OK(ret, "setsockopt"))
+		goto err_out;
+
+	memset(&aes128, 0, sizeof(aes128));
+	aes128.info.version = TLS_1_2_VERSION;
+	aes128.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+	ret = setsockopt(fd, SOL_TLS, TLS_TX, &aes128, sizeof(aes128));
+	if (!ASSERT_OK(ret, "setsockopt"))
+		goto err_out;
+
+	ret = setsockopt(cfd, SOL_TLS, TLS_RX, &aes128, sizeof(aes128));
+	if (!ASSERT_OK(ret, "setsockopt"))
+		goto err_out;
+
+	/* KTLS is enabled */
+
+	close(fd);
+	/* At this point, the cfd socket is at the CLOSE_WAIT state
+	 * and still run TLS protocol.  The test for
+	 * BPF_TCP_CLOSE_WAIT should be run at this point.
+	 */
+	ret = read(cfd, &buf, sizeof(buf));
+	ASSERT_EQ(ret, 0, "read");
+	close(cfd);
+
+	ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
+	ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
+	ASSERT_EQ(bss->nr_active, 1, "nr_active");
+	ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
+	ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
+	ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
+	ASSERT_EQ(bss->nr_fin_wait1, 1, "nr_fin_wait1");
+	return;
+
+err_out:
+	close(fd);
+	close(cfd);
+	close(sfd);
+}
+
 void test_setget_sockopt(void)
 {
 	cg_fd = test__join_cgroup(CG_NAME);
@@ -118,6 +189,8 @@ void test_setget_sockopt(void)
 	test_tcp(AF_INET);
 	test_udp(AF_INET6);
 	test_udp(AF_INET);
+	test_ktls(AF_INET6);
+	test_ktls(AF_INET);
 
 done:
 	setget_sockopt__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 9523333b8905..7a438600ae98 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -22,6 +22,7 @@ int nr_active;
 int nr_connect;
 int nr_binddev;
 int nr_socket_post_create;
+int nr_fin_wait1;
 
 struct sockopt_test {
 	int opt;
@@ -386,6 +387,13 @@ int skops_sockopt(struct bpf_sock_ops *skops)
 		nr_passive += !(bpf_test_sockopt(skops, sk) ||
 				test_tcp_maxseg(skops, sk) ||
 				test_tcp_saved_syn(skops, sk));
+		bpf_sock_ops_cb_flags_set(skops,
+					  skops->bpf_sock_ops_cb_flags |
+					  BPF_SOCK_OPS_STATE_CB_FLAG);
+		break;
+	case BPF_SOCK_OPS_STATE_CB:
+		if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
+			nr_fin_wait1 += !bpf_test_sockopt(skops, sk);
 		break;
 	}
 
-- 
cgit v1.2.3


From ae5439658ccec3dcef1754dcd2ea3d3529b3508d Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Tue, 24 Jan 2023 14:36:44 +0100
Subject: selftests/net: Cover the IP_LOCAL_PORT_RANGE socket option

Exercise IP_LOCAL_PORT_RANGE socket option in various scenarios:

1. pass invalid values to setsockopt
2. pass a range outside of the per-netns port range
3. configure a single-port range
4. exhaust a configured multi-port range
5. check interaction with late-bind (IP_BIND_ADDRESS_NO_PORT)
6. set then get the per-socket port range

Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile               |   2 +
 tools/testing/selftests/net/ip_local_port_range.c  | 447 +++++++++++++++++++++
 tools/testing/selftests/net/ip_local_port_range.sh |   5 +
 3 files changed, 454 insertions(+)
 create mode 100644 tools/testing/selftests/net/ip_local_port_range.c
 create mode 100755 tools/testing/selftests/net/ip_local_port_range.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 47314f0b3006..951bd5342bc6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -45,6 +45,7 @@ TEST_PROGS += arp_ndisc_untracked_subnets.sh
 TEST_PROGS += stress_reuseport_listen.sh
 TEST_PROGS += l2_tos_ttl_inherit.sh
 TEST_PROGS += bind_bhash.sh
+TEST_PROGS += ip_local_port_range.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
@@ -76,6 +77,7 @@ TEST_PROGS += sctp_vrf.sh
 TEST_GEN_FILES += sctp_hello
 TEST_GEN_FILES += csum
 TEST_GEN_FILES += nat6to4.o
+TEST_GEN_FILES += ip_local_port_range
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
new file mode 100644
index 000000000000..75e3fdacdf73
--- /dev/null
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2023 Cloudflare
+
+/* Test IP_LOCAL_PORT_RANGE socket option: IPv4 + IPv6, TCP + UDP.
+ *
+ * Tests assume that net.ipv4.ip_local_port_range is [40000, 49999].
+ * Don't run these directly but with ip_local_port_range.sh script.
+ */
+
+#include <fcntl.h>
+#include <netinet/ip.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef IP_LOCAL_PORT_RANGE
+#define IP_LOCAL_PORT_RANGE 51
+#endif
+
+static __u32 pack_port_range(__u16 lo, __u16 hi)
+{
+	return (hi << 16) | (lo << 0);
+}
+
+static void unpack_port_range(__u32 range, __u16 *lo, __u16 *hi)
+{
+	*lo = range & 0xffff;
+	*hi = range >> 16;
+}
+
+static int get_so_domain(int fd)
+{
+	int domain, err;
+	socklen_t len;
+
+	len = sizeof(domain);
+	err = getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &domain, &len);
+	if (err)
+		return -1;
+
+	return domain;
+}
+
+static int bind_to_loopback_any_port(int fd)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in v4;
+		struct sockaddr_in6 v6;
+	} addr;
+	socklen_t addr_len;
+
+	memset(&addr, 0, sizeof(addr));
+	switch (get_so_domain(fd)) {
+	case AF_INET:
+		addr.v4.sin_family = AF_INET;
+		addr.v4.sin_port = htons(0);
+		addr.v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		addr_len = sizeof(addr.v4);
+		break;
+	case AF_INET6:
+		addr.v6.sin6_family = AF_INET6;
+		addr.v6.sin6_port = htons(0);
+		addr.v6.sin6_addr = in6addr_loopback;
+		addr_len = sizeof(addr.v6);
+		break;
+	default:
+		return -1;
+	}
+
+	return bind(fd, &addr.sa, addr_len);
+}
+
+static int get_sock_port(int fd)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in v4;
+		struct sockaddr_in6 v6;
+	} addr;
+	socklen_t addr_len;
+	int err;
+
+	addr_len = sizeof(addr);
+	memset(&addr, 0, sizeof(addr));
+	err = getsockname(fd, &addr.sa, &addr_len);
+	if (err)
+		return -1;
+
+	switch (addr.sa.sa_family) {
+	case AF_INET:
+		return ntohs(addr.v4.sin_port);
+	case AF_INET6:
+		return ntohs(addr.v6.sin6_port);
+	default:
+		errno = EAFNOSUPPORT;
+		return -1;
+	}
+}
+
+static int get_ip_local_port_range(int fd, __u32 *range)
+{
+	socklen_t len;
+	__u32 val;
+	int err;
+
+	len = sizeof(val);
+	err = getsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val, &len);
+	if (err)
+		return -1;
+
+	*range = val;
+	return 0;
+}
+
+FIXTURE(ip_local_port_range) {};
+
+FIXTURE_SETUP(ip_local_port_range)
+{
+}
+
+FIXTURE_TEARDOWN(ip_local_port_range)
+{
+}
+
+FIXTURE_VARIANT(ip_local_port_range) {
+	int so_domain;
+	int so_type;
+	int so_protocol;
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_tcp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_udp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_DGRAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_SCTP,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_udp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_DGRAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_SCTP,
+};
+
+TEST_F(ip_local_port_range, invalid_option_value)
+{
+	__u16 val16;
+	__u32 val32;
+	__u64 val64;
+	int fd, err;
+
+	fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	/* Too few bytes */
+	val16 = 40000;
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val16, sizeof(val16));
+	EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+	EXPECT_EQ(errno, EINVAL);
+
+	/* Empty range: low port > high port */
+	val32 = pack_port_range(40222, 40111);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val32, sizeof(val32));
+	EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+	EXPECT_EQ(errno, EINVAL);
+
+	/* Too many bytes */
+	val64 = pack_port_range(40333, 40444);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val64, sizeof(val64));
+	EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+	EXPECT_EQ(errno, EINVAL);
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+TEST_F(ip_local_port_range, port_range_out_of_netns_range)
+{
+	const struct test {
+		__u16 range_lo;
+		__u16 range_hi;
+	} tests[] = {
+		{ 30000, 39999 }, /* socket range below netns range */
+		{ 50000, 59999 }, /* socket range above netns range */
+	};
+	const struct test *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		/* Bind a couple of sockets, not just one, to check
+		 * that the range wasn't clamped to a single port from
+		 * the netns range. That is [40000, 40000] or [49999,
+		 * 49999], respectively for each test case.
+		 */
+		int fds[2], i;
+
+		TH_LOG("lo %5hu, hi %5hu", t->range_lo, t->range_hi);
+
+		for (i = 0; i < ARRAY_SIZE(fds); i++) {
+			int fd, err, port;
+			__u32 range;
+
+			fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+			ASSERT_GE(fd, 0) TH_LOG("#%d: socket failed", i);
+
+			range = pack_port_range(t->range_lo, t->range_hi);
+			err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+			ASSERT_TRUE(!err) TH_LOG("#%d: setsockopt(IP_LOCAL_PORT_RANGE) failed", i);
+
+			err = bind_to_loopback_any_port(fd);
+			ASSERT_TRUE(!err) TH_LOG("#%d: bind failed", i);
+
+			/* Check that socket port range outside of ephemeral range is ignored */
+			port = get_sock_port(fd);
+			ASSERT_GE(port, 40000) TH_LOG("#%d: expected port within netns range", i);
+			ASSERT_LE(port, 49999) TH_LOG("#%d: expected port within netns range", i);
+
+			fds[i] = fd;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(fds); i++)
+			ASSERT_TRUE(close(fds[i]) == 0) TH_LOG("#%d: close failed", i);
+	}
+}
+
+TEST_F(ip_local_port_range, single_port_range)
+{
+	const struct test {
+		__u16 range_lo;
+		__u16 range_hi;
+		__u16 expected;
+	} tests[] = {
+		/* single port range within ephemeral range */
+		{ 45000, 45000, 45000 },
+		/* first port in the ephemeral range (clamp from above) */
+		{ 0, 40000, 40000 },
+		/* last port in the ephemeral range (clamp from below)  */
+		{ 49999, 0, 49999 },
+	};
+	const struct test *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		int fd, err, port;
+		__u32 range;
+
+		TH_LOG("lo %5hu, hi %5hu, expected %5hu",
+		       t->range_lo, t->range_hi, t->expected);
+
+		fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+		ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+		range = pack_port_range(t->range_lo, t->range_hi);
+		err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+		ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+		err = bind_to_loopback_any_port(fd);
+		ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+		port = get_sock_port(fd);
+		ASSERT_EQ(port, t->expected) TH_LOG("unexpected local port");
+
+		err = close(fd);
+		ASSERT_TRUE(!err) TH_LOG("close failed");
+	}
+}
+
+TEST_F(ip_local_port_range, exhaust_8_port_range)
+{
+	__u8 port_set = 0;
+	int i, fd, err;
+	__u32 range;
+	__u16 port;
+	int fds[8];
+
+	for (i = 0; i < ARRAY_SIZE(fds); i++) {
+		fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+		ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+		range = pack_port_range(40000, 40007);
+		err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+		ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+		err = bind_to_loopback_any_port(fd);
+		ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+		port = get_sock_port(fd);
+		ASSERT_GE(port, 40000) TH_LOG("expected port within sockopt range");
+		ASSERT_LE(port, 40007) TH_LOG("expected port within sockopt range");
+
+		port_set |= 1 << (port - 40000);
+		fds[i] = fd;
+	}
+
+	/* Check that all every port from the test range is in use */
+	ASSERT_EQ(port_set, 0xff) TH_LOG("expected all ports to be busy");
+
+	/* Check that bind() fails because the whole range is busy */
+	fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	range = pack_port_range(40000, 40007);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	err = bind_to_loopback_any_port(fd);
+	ASSERT_TRUE(err) TH_LOG("expected bind to fail");
+	ASSERT_EQ(errno, EADDRINUSE);
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+
+	for (i = 0; i < ARRAY_SIZE(fds); i++) {
+		err = close(fds[i]);
+		ASSERT_TRUE(!err) TH_LOG("close failed");
+	}
+}
+
+TEST_F(ip_local_port_range, late_bind)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in v4;
+		struct sockaddr_in6 v6;
+	} addr;
+	socklen_t addr_len;
+	const int one = 1;
+	int fd, err;
+	__u32 range;
+	__u16 port;
+
+	if (variant->so_protocol == IPPROTO_SCTP)
+		SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT");
+
+	fd = socket(variant->so_domain, variant->so_type, 0);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	range = pack_port_range(40100, 40199);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	err = setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_BIND_ADDRESS_NO_PORT) failed");
+
+	err = bind_to_loopback_any_port(fd);
+	ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+	port = get_sock_port(fd);
+	ASSERT_EQ(port, 0) TH_LOG("getsockname failed");
+
+	/* Invalid destination */
+	memset(&addr, 0, sizeof(addr));
+	switch (variant->so_domain) {
+	case AF_INET:
+		addr.v4.sin_family = AF_INET;
+		addr.v4.sin_port = htons(0);
+		addr.v4.sin_addr.s_addr = htonl(INADDR_ANY);
+		addr_len = sizeof(addr.v4);
+		break;
+	case AF_INET6:
+		addr.v6.sin6_family = AF_INET6;
+		addr.v6.sin6_port = htons(0);
+		addr.v6.sin6_addr = in6addr_any;
+		addr_len = sizeof(addr.v6);
+		break;
+	default:
+		ASSERT_TRUE(false) TH_LOG("unsupported socket domain");
+	}
+
+	/* connect() doesn't need to succeed for late bind to happen */
+	connect(fd, &addr.sa, addr_len);
+
+	port = get_sock_port(fd);
+	ASSERT_GE(port, 40100);
+	ASSERT_LE(port, 40199);
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+TEST_F(ip_local_port_range, get_port_range)
+{
+	__u16 lo, hi;
+	__u32 range;
+	int fd, err;
+
+	fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	/* Get range before it will be set */
+	err = get_ip_local_port_range(fd, &range);
+	ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	unpack_port_range(range, &lo, &hi);
+	ASSERT_EQ(lo, 0) TH_LOG("unexpected low port");
+	ASSERT_EQ(hi, 0) TH_LOG("unexpected high port");
+
+	range = pack_port_range(12345, 54321);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	/* Get range after it has been set */
+	err = get_ip_local_port_range(fd, &range);
+	ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	unpack_port_range(range, &lo, &hi);
+	ASSERT_EQ(lo, 12345) TH_LOG("unexpected low port");
+	ASSERT_EQ(hi, 54321) TH_LOG("unexpected high port");
+
+	/* Unset the port range  */
+	range = pack_port_range(0, 0);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	/* Get range after it has been unset */
+	err = get_ip_local_port_range(fd, &range);
+	ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	unpack_port_range(range, &lo, &hi);
+	ASSERT_EQ(lo, 0) TH_LOG("unexpected low port");
+	ASSERT_EQ(hi, 0) TH_LOG("unexpected high port");
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh
new file mode 100755
index 000000000000..6c6ad346eaa0
--- /dev/null
+++ b/tools/testing/selftests/net/ip_local_port_range.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range'
-- 
cgit v1.2.3


From ad3493746ebe9e9adf3d8c5b003fa0240f25242a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 25 Jan 2023 11:47:23 +0100
Subject: selftests: mptcp: add test-cases for mixed v4/v6 subflows

Note that we can't guess the listener family anymore based on the client
target address: always use IPv6.

The fullmesh flag with endpoints from different families is also
validated here.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 53 ++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d11d3d566608..387abdcec011 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -774,24 +774,17 @@ do_transfer()
 		addr_nr_ns2=${addr_nr_ns2:9}
 	fi
 
-	local local_addr
-	if is_v6 "${connect_addr}"; then
-		local_addr="::"
-	else
-		local_addr="0.0.0.0"
-	fi
-
 	extra_srv_args="$extra_args $extra_srv_args"
 	if [ "$test_link_fail" -gt 1 ];then
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
 				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_srv_args ${local_addr} < "$sinfail" > "$sout" &
+					$extra_srv_args "::" < "$sinfail" > "$sout" &
 	else
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
 				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_srv_args ${local_addr} < "$sin" > "$sout" &
+					$extra_srv_args "::" < "$sin" > "$sout" &
 	fi
 	local spid=$!
 
@@ -2448,6 +2441,47 @@ v4mapped_tests()
 	fi
 }
 
+mixed_tests()
+{
+	if reset "IPv4 sockets do not use IPv6 addresses"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
+		chk_join_nr 0 0 0
+	fi
+
+	# Need an IPv6 mptcp socket to allow subflows of both families
+	if reset "simult IPv4 and IPv6 subflows"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+		run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
+		chk_join_nr 1 1 1
+	fi
+
+	# cross families subflows will not be created even in fullmesh mode
+	if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then
+		pm_nl_set_limits $ns1 0 4
+		pm_nl_set_limits $ns2 1 4
+		pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+		run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow
+		chk_join_nr 1 1 1
+	fi
+
+	# fullmesh still tries to create all the possibly subflows with
+	# matching family
+	if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then
+		pm_nl_set_limits $ns1 0 4
+		pm_nl_set_limits $ns2 2 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow
+		chk_join_nr 4 4 4
+	fi
+}
+
 backup_tests()
 {
 	# single subflow, backup
@@ -3120,6 +3154,7 @@ all_tests_sorted=(
 	a@add_tests
 	6@ipv6_tests
 	4@v4mapped_tests
+	M@mixed_tests
 	b@backup_tests
 	p@add_addr_ports_tests
 	k@syncookies_tests
-- 
cgit v1.2.3


From f790ae03db33e6a3efd18fe7aefe3c7897195cc2 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 25 Jan 2023 11:47:25 +0100
Subject: selftests: mptcp: userspace: print titles

This script is running a few tests after having setup the environment.

Printing titles helps understand what is being tested.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 24 ++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index ab2d581f28a1..7b06d9d0aa46 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -43,6 +43,11 @@ rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX)
 ns1="ns1-$rndh"
 ns2="ns2-$rndh"
 
+print_title()
+{
+	stdbuf -o0 -e0 printf "INFO: %s\n" "${1}"
+}
+
 kill_wait()
 {
 	kill $1 > /dev/null 2>&1
@@ -51,7 +56,7 @@ kill_wait()
 
 cleanup()
 {
-	echo "cleanup"
+	print_title "Cleanup"
 
 	rm -rf $file $client_evts $server_evts
 
@@ -78,6 +83,8 @@ cleanup()
 	for netns in "$ns1" "$ns2" ;do
 		ip netns del "$netns"
 	done
+
+	stdbuf -o0 -e0 printf "Done\n"
 }
 
 trap cleanup EXIT
@@ -108,6 +115,7 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
 ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
 ip -net "$ns2" link set ns2eth1 up
 
+print_title "Init"
 stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2         \t\t\t[OK]\n"
 
 make_file()
@@ -255,6 +263,8 @@ verify_announce_event()
 
 test_announce()
 {
+	print_title "Announce tests"
+
 	# Capture events on the network namespace running the server
 	:>"$server_evts"
 
@@ -359,6 +369,8 @@ verify_remove_event()
 
 test_remove()
 {
+	print_title "Remove tests"
+
 	# Capture events on the network namespace running the server
 	:>"$server_evts"
 
@@ -521,6 +533,8 @@ verify_subflow_events()
 
 test_subflows()
 {
+	print_title "Subflows v4 or v6 only tests"
+
 	# Capture events on the network namespace running the server
 	:>"$server_evts"
 
@@ -754,6 +768,8 @@ test_subflows()
 
 test_subflows_v4_v6_mix()
 {
+	print_title "Subflows v4 and v6 mix tests"
+
 	# Attempt to add a listener at 10.0.2.1:<subflow-port>
 	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
 	   $app6_port > /dev/null 2>&1 &
@@ -800,6 +816,8 @@ test_subflows_v4_v6_mix()
 
 test_prio()
 {
+	print_title "Prio tests"
+
 	local count
 
 	# Send MP_PRIO signal from client to server machine
@@ -876,6 +894,8 @@ verify_listener_events()
 
 test_listener()
 {
+	print_title "Listener tests"
+
 	# Capture events on the network namespace running the client
 	:>$client_evts
 
@@ -902,8 +922,10 @@ test_listener()
 	verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
 }
 
+print_title "Make connections"
 make_connection
 make_connection "v6"
+
 test_announce
 test_remove
 test_subflows
-- 
cgit v1.2.3


From 1c0b0ee2640bbbf225c9bdf2f7b121fcf4ee2059 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 25 Jan 2023 11:47:26 +0100
Subject: selftests: mptcp: userspace: refactor asserts

Instead of having a long list of conditions to check, it is possible to
give a list of variable names to compare with their 'e_XXX' version.

This will ease the introduction of the following commit which will print
which condition has failed (if any).

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 72 +++++++++++------------
 1 file changed, 36 insertions(+), 36 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 7b06d9d0aa46..2f2a85a212b0 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -225,6 +225,36 @@ make_connection()
 	fi
 }
 
+# $1: var name
+check_expected_one()
+{
+	local var="${1}"
+	local exp="e_${var}"
+
+	[ "${!var}" = "${!exp}" ]
+}
+
+# $@: all var names to check
+check_expected()
+{
+	local ret=0
+	local var
+
+	for var in "${@}"
+	do
+		check_expected_one "${var}" || ret=1
+	done
+
+	if [ ${ret} -eq 0 ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
 verify_announce_event()
 {
 	local evt=$1
@@ -250,15 +280,8 @@ verify_announce_event()
 	fi
 	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
 	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
-		   [ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] &&
-		   [ "$id" = "$e_id" ]
-	then
-		stdbuf -o0 -e0 printf "[OK]\n"
-		return 0
-	fi
-	stdbuf -o0 -e0 printf "[FAIL]\n"
-	exit 1
+
+	check_expected "type" "token" "addr" "dport" "id"
 }
 
 test_announce()
@@ -357,14 +380,8 @@ verify_remove_event()
 	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
 	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
 	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
-		   [ "$id" = "$e_id" ]
-	then
-		stdbuf -o0 -e0 printf "[OK]\n"
-		return 0
-	fi
-	stdbuf -o0 -e0 printf "[FAIL]\n"
-	exit 1
+
+	check_expected "type" "token" "id"
 }
 
 test_remove()
@@ -519,16 +536,7 @@ verify_subflow_events()
 		daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
 	fi
 
-	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
-		   [ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] &&
-		   [ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] &&
-		   [ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ]
-	then
-		stdbuf -o0 -e0 printf "[OK]\n"
-		return 0
-	fi
-	stdbuf -o0 -e0 printf "[FAIL]\n"
-	exit 1
+	check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
 }
 
 test_subflows()
@@ -881,15 +889,7 @@ verify_listener_events()
 			sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
 	fi
 
-	if [ $type ] && [ $type = $e_type ] &&
-	   [ $family ] && [ $family = $e_family ] &&
-	   [ $saddr ] && [ $saddr = $e_saddr ] &&
-	   [ $sport ] && [ $sport = $e_sport ]; then
-		stdbuf -o0 -e0 printf "[OK]\n"
-		return 0
-	fi
-	stdbuf -o0 -e0 printf "[FAIL]\n"
-	exit 1
+	check_expected "type" "family" "saddr" "sport"
 }
 
 test_listener()
-- 
cgit v1.2.3


From 10d4273411be6ead4d4d8a50a0593da15154f43f Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 25 Jan 2023 11:47:27 +0100
Subject: selftests: mptcp: userspace: print error details if any

Before, only '[FAIL]' was printed in case of error during the validation
phase.

Now, in case of failure, the variable name, its value and expected one
are displayed to help understand what was wrong.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 33 +++++++++++++++++------
 1 file changed, 25 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 2f2a85a212b0..259382ad552c 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -201,11 +201,16 @@ make_connection()
 	server_serverside=$(grep "type:1," "$server_evts" |
 			    sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
 
+	stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1    \t\t" $is_v6
 	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
 		   [ "$server_serverside" = 1 ]
 	then
-		stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1    \t\t[OK]\n" $is_v6
+		stdbuf -o0 -e0 printf "[OK]\n"
 	else
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+		stdbuf -o0 -e0 printf "\tExpected tokens (c:%s - s:%s) and server (c:%d - s:%d)\n" \
+			"${client_token}" "${server_token}" \
+			"${client_serverside}" "${server_serverside}"
 		exit 1
 	fi
 
@@ -225,13 +230,26 @@ make_connection()
 	fi
 }
 
-# $1: var name
+# $1: var name ; $2: prev ret
 check_expected_one()
 {
 	local var="${1}"
 	local exp="e_${var}"
+	local prev_ret="${2}"
 
-	[ "${!var}" = "${!exp}" ]
+	if [ "${!var}" = "${!exp}" ]
+	then
+		return 0
+	fi
+
+	if [ "${prev_ret}" = "0" ]
+	then
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+	fi
+
+	stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \
+		"${var}" "${!var}" "${!exp}"
+	return 1
 }
 
 # $@: all var names to check
@@ -242,7 +260,7 @@ check_expected()
 
 	for var in "${@}"
 	do
-		check_expected_one "${var}" || ret=1
+		check_expected_one "${var}" "${ret}" || ret=1
 	done
 
 	if [ ${ret} -eq 0 ]
@@ -251,7 +269,6 @@ check_expected()
 		return 0
 	fi
 
-	stdbuf -o0 -e0 printf "[FAIL]\n"
 	exit 1
 }
 
@@ -303,7 +320,7 @@ test_announce()
 	then
 		stdbuf -o0 -e0 printf "[OK]\n"
 	else
-		stdbuf -o0 -e0 printf "[FAIL]\n"
+		stdbuf -o0 -e0 printf "[FAIL]\n\ttype defined: %s\n" "${type}"
 		exit 1
 	fi
 
@@ -837,7 +854,7 @@ test_prio()
 	count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ $count != 1 ]; then
-		stdbuf -o0 -e0 printf "[FAIL]\n"
+		stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
 		exit 1
 	else
 		stdbuf -o0 -e0 printf "[OK]\n"
@@ -848,7 +865,7 @@ test_prio()
 	count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ $count != 1 ]; then
-		stdbuf -o0 -e0 printf "[FAIL]\n"
+		stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}"
 		exit 1
 	else
 		stdbuf -o0 -e0 printf "[OK]\n"
-- 
cgit v1.2.3


From 8dbdf24f4e9e060e4c4a3ad282a4a57f80f48b1f Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 25 Jan 2023 11:47:28 +0100
Subject: selftests: mptcp: userspace: avoid read errors

During the cleanup phase, the server pids were killed with a SIGTERM
directly, not using a SIGUSR1 first to quit safely. As a result, this
test was often ending with two error messages:

  read: Connection reset by peer

While at it, use a for-loop to terminate all the PIDs the same way.

Also the different files are now removed after having killed the PIDs
using them. It makes more sense to do that in this order.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 32 +++++++++--------------
 1 file changed, 12 insertions(+), 20 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 259382ad552c..66c5be25c13d 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -50,6 +50,9 @@ print_title()
 
 kill_wait()
 {
+	[ $1 -eq 0 ] && return 0
+
+	kill -SIGUSR1 $1 > /dev/null 2>&1
 	kill $1 > /dev/null 2>&1
 	wait $1 2>/dev/null
 }
@@ -58,32 +61,21 @@ cleanup()
 {
 	print_title "Cleanup"
 
-	rm -rf $file $client_evts $server_evts
-
 	# Terminate the MPTCP connection and related processes
-	if [ $client4_pid -ne 0 ]; then
-		kill -SIGUSR1 $client4_pid > /dev/null 2>&1
-	fi
-	if [ $server4_pid -ne 0 ]; then
-		kill_wait $server4_pid
-	fi
-	if [ $client6_pid -ne 0 ]; then
-		kill -SIGUSR1 $client6_pid > /dev/null 2>&1
-	fi
-	if [ $server6_pid -ne 0 ]; then
-		kill_wait $server6_pid
-	fi
-	if [ $server_evts_pid -ne 0 ]; then
-		kill_wait $server_evts_pid
-	fi
-	if [ $client_evts_pid -ne 0 ]; then
-		kill_wait $client_evts_pid
-	fi
+	local pid
+	for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
+		   $server_evts_pid $client_evts_pid
+	do
+		kill_wait $pid
+	done
+
 	local netns
 	for netns in "$ns1" "$ns2" ;do
 		ip netns del "$netns"
 	done
 
+	rm -rf $file $client_evts $server_evts
+
 	stdbuf -o0 -e0 printf "Done\n"
 }
 
-- 
cgit v1.2.3


From a5f3a3f7c17273f825e96d1b3369164a39345a35 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 26 Jan 2023 14:50:30 -0800
Subject: selftests/bpf: Properly enable hwtstamp in xdp_hw_metadata

The existing timestamping_enable() is a no-op because it applies
to the socket-related path that we are not verifying here
anymore. (but still leaving the code around hoping we can
have xdp->skb path verified here as well)

  poll: 1 (0)
  xsk_ring_cons__peek: 1
  0xf64788: rx_desc[0]->addr=100000000008000 addr=8100 comp_addr=8000
  rx_hash: 3697961069
  rx_timestamp:  1674657672142214773 (sec:1674657672.1422)
  XDP RX-time:   1674657709561774876 (sec:1674657709.5618) delta sec:37.4196
  AF_XDP time:   1674657709561871034 (sec:1674657709.5619) delta
sec:0.0001 (96.158 usec)
  0xf64788: complete idx=8 addr=8000

Also, maybe something to archive here, see [0] for Jesper's note
about NIC vs host clock delta.

0: https://lore.kernel.org/bpf/f3a116dc-1b14-3432-ad20-a36179ef0608@redhat.com/

v2:
- Restore original value (Martin)

Fixes: 297a3f124155 ("selftests/bpf: Simple program to dump XDP RX metadata")
Reported-by: Jesper Dangaard Brouer <jbrouer@redhat.com>
Tested-by: Jesper Dangaard Brouer <jbrouer@redhat.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230126225030.510629-1-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 45 ++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 0008f0f239e8..3823b1c499cc 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -24,6 +24,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/udp.h>
 #include <linux/sockios.h>
+#include <linux/net_tstamp.h>
 #include <sys/mman.h>
 #include <net/if.h>
 #include <poll.h>
@@ -278,13 +279,53 @@ static int rxq_num(const char *ifname)
 
 	ret = ioctl(fd, SIOCETHTOOL, &ifr);
 	if (ret < 0)
-		error(-1, errno, "socket");
+		error(-1, errno, "ioctl(SIOCETHTOOL)");
 
 	close(fd);
 
 	return ch.rx_count + ch.combined_count;
 }
 
+static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
+{
+	struct ifreq ifr = {
+		.ifr_data = (void *)cfg,
+	};
+	strcpy(ifr.ifr_name, ifname);
+	int fd, ret;
+
+	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+	if (fd < 0)
+		error(-1, errno, "socket");
+
+	ret = ioctl(fd, op, &ifr);
+	if (ret < 0)
+		error(-1, errno, "ioctl(%d)", op);
+
+	close(fd);
+}
+
+static struct hwtstamp_config saved_hwtstamp_cfg;
+static const char *saved_hwtstamp_ifname;
+
+static void hwtstamp_restore(void)
+{
+	hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
+}
+
+static void hwtstamp_enable(const char *ifname)
+{
+	struct hwtstamp_config cfg = {
+		.rx_filter = HWTSTAMP_FILTER_ALL,
+	};
+
+	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
+	saved_hwtstamp_ifname = strdup(ifname);
+	atexit(hwtstamp_restore);
+
+	hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
+}
+
 static void cleanup(void)
 {
 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
@@ -341,6 +382,8 @@ int main(int argc, char *argv[])
 
 	printf("rxq: %d\n", rxq);
 
+	hwtstamp_enable(ifname);
+
 	rx_xsk = malloc(sizeof(struct xsk) * rxq);
 	if (!rx_xsk)
 		error(-1, ENOMEM, "malloc");
-- 
cgit v1.2.3


From 16809afdcbad5fa45f34622f62873c7d7114cde5 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 27 Jan 2023 13:57:05 -0800
Subject: selftest/bpf: Make crashes more debuggable in test_progs

Reset stdio before printing verbose log of the SIGSEGV'ed test.
Otherwise, it's hard to understand what's going on in the cases like [0].

With the following patch applied:

	--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
	+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
	@@ -392,6 +392,11 @@ void test_xdp_metadata(void)
	 		       "generate freplace packet"))
	 		goto out;

	+
	+	ASSERT_EQ(1, 2, "oops");
	+	int *x = 0;
	+	*x = 1; /* die */
	+
	 	while (!retries--) {
	 		if (bpf_obj2->bss->called)
	 			break;

Before:

 #281     xdp_metadata:FAIL
Caught signal #11!
Stack trace:
./test_progs(crash_handler+0x1f)[0x55c919d98bcf]
/lib/x86_64-linux-gnu/libc.so.6(+0x3bf90)[0x7f36aea5df90]
./test_progs(test_xdp_metadata+0x1db0)[0x55c919d8c6d0]
./test_progs(+0x23b438)[0x55c919d9a438]
./test_progs(main+0x534)[0x55c919d99454]
/lib/x86_64-linux-gnu/libc.so.6(+0x2718a)[0x7f36aea4918a]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85)[0x7f36aea49245]
./test_progs(_start+0x21)[0x55c919b82ef1]

After:

test_xdp_metadata:PASS:ip netns add xdp_metadata 0 nsec
open_netns:PASS:malloc token 0 nsec
open_netns:PASS:open /proc/self/ns/net 0 nsec
open_netns:PASS:open netns fd 0 nsec
open_netns:PASS:setns 0 nsec
..
test_xdp_metadata:FAIL:oops unexpected oops: actual 1 != expected 2
 #281     xdp_metadata:FAIL
Caught signal #11!
Stack trace:
./test_progs(crash_handler+0x1f)[0x562714a76bcf]
/lib/x86_64-linux-gnu/libc.so.6(+0x3bf90)[0x7fa663f9cf90]
./test_progs(test_xdp_metadata+0x1db0)[0x562714a6a6d0]
./test_progs(+0x23b438)[0x562714a78438]
./test_progs(main+0x534)[0x562714a77454]
/lib/x86_64-linux-gnu/libc.so.6(+0x2718a)[0x7fa663f8818a]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x85)[0x7fa663f88245]
./test_progs(_start+0x21)[0x562714860ef1]

0: https://github.com/kernel-patches/bpf/actions/runs/4019879316/jobs/6907358876

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230127215705.1254316-1-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/test_progs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 4716e38e153a..c5f852163246 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -975,12 +975,12 @@ void crash_handler(int signum)
 
 	sz = backtrace(bt, ARRAY_SIZE(bt));
 
+	if (env.stdout)
+		stdio_restore();
 	if (env.test) {
 		env.test_state->error_cnt++;
 		dump_test_log(env.test, env.test_state, true, false);
 	}
-	if (env.stdout)
-		stdio_restore();
 	if (env.worker_id != -1)
 		fprintf(stderr, "[%d]: ", env.worker_id);
 	fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
-- 
cgit v1.2.3


From 8fb9fb2f1728a56a2a6dba79de6f17975def658d Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:22 +0100
Subject: selftests/bpf: Query BPF_MAX_TRAMP_LINKS using BTF

Do not hard-code the value, since for s390x it will be smaller than
for x86.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-4-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/fexit_stress.c        | 22 ++++++++-----
 .../selftests/bpf/prog_tests/trampoline_count.c    | 16 ++++++---
 tools/testing/selftests/bpf/test_progs.c           | 38 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h           |  2 ++
 4 files changed, 65 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 5a7e6011f6bf..596536def43d 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -2,14 +2,19 @@
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
 
-/* that's kernel internal BPF_MAX_TRAMP_PROGS define */
-#define CNT 38
-
 void serial_test_fexit_stress(void)
 {
-	int fexit_fd[CNT] = {};
-	int link_fd[CNT] = {};
-	int err, i;
+	int bpf_max_tramp_links, err, i;
+	int *fd, *fexit_fd, *link_fd;
+
+	bpf_max_tramp_links = get_bpf_max_tramp_links();
+	if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links"))
+		return;
+	fd = calloc(bpf_max_tramp_links * 2, sizeof(*fd));
+	if (!ASSERT_OK_PTR(fd, "fd"))
+		return;
+	fexit_fd = fd;
+	link_fd = fd + bpf_max_tramp_links;
 
 	const struct bpf_insn trace_program[] = {
 		BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -28,7 +33,7 @@ void serial_test_fexit_stress(void)
 		goto out;
 	trace_opts.attach_btf_id = err;
 
-	for (i = 0; i < CNT; i++) {
+	for (i = 0; i < bpf_max_tramp_links; i++) {
 		fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL",
 					    trace_program,
 					    sizeof(trace_program) / sizeof(struct bpf_insn),
@@ -44,10 +49,11 @@ void serial_test_fexit_stress(void)
 	ASSERT_OK(err, "bpf_prog_test_run_opts");
 
 out:
-	for (i = 0; i < CNT; i++) {
+	for (i = 0; i < bpf_max_tramp_links; i++) {
 		if (link_fd[i])
 			close(link_fd[i]);
 		if (fexit_fd[i])
 			close(fexit_fd[i]);
 	}
+	free(fd);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 564b75bc087f..8fd4c0d78089 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -2,8 +2,6 @@
 #define _GNU_SOURCE
 #include <test_progs.h>
 
-#define MAX_TRAMP_PROGS 38
-
 struct inst {
 	struct bpf_object *obj;
 	struct bpf_link   *link;
@@ -37,14 +35,21 @@ void serial_test_trampoline_count(void)
 {
 	char *file = "test_trampoline_count.bpf.o";
 	char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" };
-	struct inst inst[MAX_TRAMP_PROGS + 1] = {};
+	int bpf_max_tramp_links, err, i, prog_fd;
 	struct bpf_program *prog;
 	struct bpf_link *link;
-	int prog_fd, err, i;
+	struct inst *inst;
 	LIBBPF_OPTS(bpf_test_run_opts, opts);
 
+	bpf_max_tramp_links = get_bpf_max_tramp_links();
+	if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links"))
+		return;
+	inst = calloc(bpf_max_tramp_links + 1, sizeof(*inst));
+	if (!ASSERT_OK_PTR(inst, "inst"))
+		return;
+
 	/* attach 'allowed' trampoline programs */
-	for (i = 0; i < MAX_TRAMP_PROGS; i++) {
+	for (i = 0; i < bpf_max_tramp_links; i++) {
 		prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]);
 		if (!prog)
 			goto cleanup;
@@ -91,4 +96,5 @@ cleanup:
 		bpf_link__destroy(inst[i].link);
 		bpf_object__close(inst[i].obj);
 	}
+	free(inst);
 }
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c5f852163246..6d5e3022c75f 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -17,6 +17,7 @@
 #include <sys/select.h>
 #include <sys/socket.h>
 #include <sys/un.h>
+#include <bpf/btf.h>
 
 static bool verbose(void)
 {
@@ -967,6 +968,43 @@ int write_sysctl(const char *sysctl, const char *value)
 	return 0;
 }
 
+int get_bpf_max_tramp_links_from(struct btf *btf)
+{
+	const struct btf_enum *e;
+	const struct btf_type *t;
+	__u32 i, type_cnt;
+	const char *name;
+	__u16 j, vlen;
+
+	for (i = 1, type_cnt = btf__type_cnt(btf); i < type_cnt; i++) {
+		t = btf__type_by_id(btf, i);
+		if (!t || !btf_is_enum(t) || t->name_off)
+			continue;
+		e = btf_enum(t);
+		for (j = 0, vlen = btf_vlen(t); j < vlen; j++, e++) {
+			name = btf__str_by_offset(btf, e->name_off);
+			if (name && !strcmp(name, "BPF_MAX_TRAMP_LINKS"))
+				return e->val;
+		}
+	}
+
+	return -1;
+}
+
+int get_bpf_max_tramp_links(void)
+{
+	struct btf *vmlinux_btf;
+	int ret;
+
+	vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux btf"))
+		return -1;
+	ret = get_bpf_max_tramp_links_from(vmlinux_btf);
+	btf__free(vmlinux_btf);
+
+	return ret;
+}
+
 #define MAX_BACKTRACE_SZ 128
 void crash_handler(int signum)
 {
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 3f058dfadbaf..d5d51ec97ec8 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -394,6 +394,8 @@ int kern_sync_rcu(void);
 int trigger_module_test_read(int read_sz);
 int trigger_module_test_write(int write_sz);
 int write_sysctl(const char *sysctl, const char *value);
+int get_bpf_max_tramp_links_from(struct btf *btf);
+int get_bpf_max_tramp_links(void);
 
 #ifdef __x86_64__
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
-- 
cgit v1.2.3


From b14b01f281f728f465d2440ce6ed0491df735169 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:23 +0100
Subject: selftests/bpf: Fix liburandom_read.so linker error

When building with O=, the following linker error occurs:

    clang: error: no such file or directory: 'liburandom_read.so'

Fix by adding $(OUTPUT) to the linker search path.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-5-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 53eae7be8dff..02d41af05fe1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -189,7 +189,7 @@ $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
 $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
 	$(call msg,BINARY,,$@)
 	$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
-		     liburandom_read.so $(filter-out -static,$(LDLIBS))	     \
+		     -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT)  \
 		     -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
 		     -Wl,-rpath=. -o $@
 
-- 
cgit v1.2.3


From 6eab2370d142cdfe853d168881b0d4abd3c6f7a6 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:24 +0100
Subject: selftests/bpf: Fix symlink creation error

When building with O=, the following error occurs:

    ln: failed to create symbolic link 'no_alu32/bpftool': No such file or directory

Adjust the code to account for $(OUTPUT).

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-6-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 02d41af05fe1..e79039726173 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -519,7 +519,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 	$$(call msg,BINARY,,$$@)
 	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
 	$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
-	$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/bootstrap/bpftool $(if $2,$2/)bpftool
+	$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/bootstrap/bpftool \
+		   $(OUTPUT)/$(if $2,$2/)bpftool
 
 endef
 
-- 
cgit v1.2.3


From 31da9be64a1136e5699a1fc59a45001d6b98acdc Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:25 +0100
Subject: selftests/bpf: Fix kfree_skb on s390x

h_proto is big-endian; use htons() in order to make comparison work on
both little- and big-endian machines.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-7-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 73579370bfbd..c07991544a78 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -36,7 +36,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 		  "cb32_0 %x != %x\n",
 		  meta->cb32_0, cb.cb32[0]))
 		return;
-	if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth",
+	if (CHECK(pkt_v6->eth.h_proto != htons(ETH_P_IPV6), "check_eth",
 		  "h_proto %x\n", pkt_v6->eth.h_proto))
 		return;
 	if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip",
-- 
cgit v1.2.3


From 804acdd251e837bb1a588074752d69698ac36b5f Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:26 +0100
Subject: selftests/bpf: Set errno when urand_spawn() fails

The result of urand_spawn() is checked with ASSERT_OK_PTR, which treats
NULL as success if errno == 0.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-8-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/usdt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 9ad9da0f215e..56ed1eb9b527 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -314,6 +314,7 @@ static FILE *urand_spawn(int *pid)
 
 	if (fscanf(f, "%d", pid) != 1) {
 		pclose(f);
+		errno = EINVAL;
 		return NULL;
 	}
 
-- 
cgit v1.2.3


From 98e13848cf43b66b0f32f10011aa398c2bff5ae6 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:27 +0100
Subject: selftests/bpf: Fix decap_sanity_ns cleanup

decap_sanity prints the following on the 1st run:

    decap_sanity: sh: 1: Syntax error: Bad fd number

and the following on the 2nd run:

    Cannot create namespace file "/run/netns/decap_sanity_ns": File exists

The problem is that the cleanup command has a typo and does nothing.
Fix the typo.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-9-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/decap_sanity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
index 0b2f73b88c53..2853883b7cbb 100644
--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -80,6 +80,6 @@ fail:
 		bpf_tc_hook_destroy(&qdisc_hook);
 		close_netns(nstoken);
 	}
-	system("ip netns del " NS_TEST " >& /dev/null");
+	system("ip netns del " NS_TEST " &> /dev/null");
 	decap_sanity__destroy(skel);
 }
-- 
cgit v1.2.3


From 56e1a50483194b2e0ac54849e94cc2b80480895e Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:28 +0100
Subject: selftests/bpf: Fix verify_pkcs7_sig on s390x

Use bpf_probe_read_kernel() instead of bpf_probe_read(), which is not
defined on all architectures.

While at it, improve the error handling: do not hide the verifier log,
and check the return values of bpf_probe_read_kernel() and
bpf_copy_from_user().

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-10-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c |  3 +++
 tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c | 12 ++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
index 579d6ee83ce0..dd7f2bc70048 100644
--- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -61,6 +61,9 @@ static bool kfunc_not_supported;
 static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
 			   va_list args)
 {
+	if (level == LIBBPF_WARN)
+		vprintf(fmt, args);
+
 	if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
 		return 0;
 
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index ce419304ff1f..7748cc23de8a 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -59,10 +59,14 @@ int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
 	if (!data_val)
 		return 0;
 
-	bpf_probe_read(&value, sizeof(value), &attr->value);
-
-	bpf_copy_from_user(data_val, sizeof(struct data),
-			   (void *)(unsigned long)value);
+	ret = bpf_probe_read_kernel(&value, sizeof(value), &attr->value);
+	if (ret)
+		return ret;
+
+	ret = bpf_copy_from_user(data_val, sizeof(struct data),
+				 (void *)(unsigned long)value);
+	if (ret)
+		return ret;
 
 	if (data_val->data_len > sizeof(data_val->data))
 		return -EINVAL;
-- 
cgit v1.2.3


From 06c1865b0b0c7820ea53af2394dd7aff31100295 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:29 +0100
Subject: selftests/bpf: Fix xdp_do_redirect on s390x

s390x cache line size is 256 bytes, so skb_shared_info must be aligned
on a much larger boundary than for x86. This makes the maximum packet
size smaller.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-11-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index a50971c6cf4a..ac70e871d62f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -65,7 +65,11 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
 /* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
  * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes
  */
+#if defined(__s390x__)
+#define MAX_PKT_SIZE 3176
+#else
 #define MAX_PKT_SIZE 3368
+#endif
 static void test_max_pkt_size(int fd)
 {
 	char data[MAX_PKT_SIZE + 1] = {};
-- 
cgit v1.2.3


From 06cea99e683c66e16cdf04ef91d2a008b34d7d3d Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:30 +0100
Subject: selftests/bpf: Fix cgrp_local_storage on s390x

Sync the definition of socket_cookie between the eBPF program and the
test. Currently the test works by accident, since on little-endian it
is sometimes acceptable to access u64 as u32.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-12-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 33a2776737e7..2cc759956e3b 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -16,7 +16,7 @@
 
 struct socket_cookie {
 	__u64 cookie_key;
-	__u32 cookie_value;
+	__u64 cookie_value;
 };
 
 static void test_tp_btf(int cgroup_fd)
-- 
cgit v1.2.3


From 2934565f04fd21319fd2dfcc68bebebff503ceb2 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:31 +0100
Subject: selftests/bpf: Check stack_mprotect() return value

If stack_mprotect() succeeds, errno is not changed. This can produce
misleading error messages, that show stale errno.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-13-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_cookie.c | 6 ++++--
 tools/testing/selftests/bpf/prog_tests/test_lsm.c   | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 2be2d61954bc..26b2d1bffdfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -472,6 +472,7 @@ static void lsm_subtest(struct test_bpf_cookie *skel)
 	int prog_fd;
 	int lsm_fd = -1;
 	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+	int err;
 
 	skel->bss->lsm_res = 0;
 
@@ -482,8 +483,9 @@ static void lsm_subtest(struct test_bpf_cookie *skel)
 	if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create"))
 		goto cleanup;
 
-	stack_mprotect();
-	if (!ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+	err = stack_mprotect();
+	if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+	    !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
 		goto cleanup;
 
 	usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 244c01125126..16175d579bc7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -75,7 +75,8 @@ static int test_lsm(struct lsm *skel)
 	skel->bss->monitored_pid = getpid();
 
 	err = stack_mprotect();
-	if (!ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+	if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+	    !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
 		return err;
 
 	ASSERT_EQ(skel->bss->mprotect_count, 1, "mprotect_count");
-- 
cgit v1.2.3


From 80a611904eef65e8c60e0c8c8f50fa98a0bd0c69 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:32 +0100
Subject: selftests/bpf: Increase SIZEOF_BPF_LOCAL_STORAGE_ELEM on s390x

sizeof(struct bpf_local_storage_elem) is 512 on s390x:

    struct bpf_local_storage_elem {
            struct hlist_node          map_node;             /*     0    16 */
            struct hlist_node          snode;                /*    16    16 */
            struct bpf_local_storage * local_storage;        /*    32     8 */
            struct callback_head       rcu __attribute__((__aligned__(8))); /*    40    16 */

            /* XXX 200 bytes hole, try to pack */

            /* --- cacheline 1 boundary (256 bytes) --- */
            struct bpf_local_storage_data sdata __attribute__((__aligned__(256))); /*   256     8 */

            /* size: 512, cachelines: 2, members: 5 */
            /* sum members: 64, holes: 1, sum holes: 200 */
            /* padding: 248 */
            /* forced alignments: 2, forced holes: 1, sum forced holes: 200 */
    } __attribute__((__aligned__(256)));

As the existing comment suggests, use a larger number in order to be
future-proof.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-14-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/netcnt_common.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
index 0ab1c88041cd..2d4a58e4e39c 100644
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -8,11 +8,11 @@
 
 /* sizeof(struct bpf_local_storage_elem):
  *
- * It really is about 128 bytes on x86_64, but allocate more to account for
- * possible layout changes, different architectures, etc.
+ * It is about 128 bytes on x86_64 and 512 bytes on s390x, but allocate more to
+ * account for possible layout changes, different architectures, etc.
  * The kernel will wrap up to PAGE_SIZE internally anyway.
  */
-#define SIZEOF_BPF_LOCAL_STORAGE_ELEM		256
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM		768
 
 /* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
 #define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE	(0xFFFF - \
-- 
cgit v1.2.3


From be6b5c10ecc4014446e5c807d6a69c5a7cc1c497 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:33 +0100
Subject: selftests/bpf: Add a sign-extension test for kfuncs

s390x ABI requires the caller to zero- or sign-extend the arguments.
eBPF already deals with zero-extension (by definition of its ABI), but
not with sign-extension.

Add a test to cover that potentially problematic area.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-15-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                                  |  9 +++++++++
 tools/testing/selftests/bpf/prog_tests/kfunc_call.c |  1 +
 tools/testing/selftests/bpf/progs/kfunc_call_test.c | 18 ++++++++++++++++++
 3 files changed, 28 insertions(+)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 8da0d73b368e..7dbefa4fd2eb 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -550,6 +550,14 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
 	return sk;
 }
 
+long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
+{
+	/* Provoke the compiler to assume that the caller has sign-extended a,
+	 * b and c on platforms where this is required (e.g. s390x).
+	 */
+	return (long)a + (long)b + (long)c + d;
+}
+
 struct prog_test_member1 {
 	int a;
 };
@@ -746,6 +754,7 @@ BTF_SET8_START(test_sk_check_kfunc_ids)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 5af1ee8f0e6e..bb4cd82a788a 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -72,6 +72,7 @@ static struct kfunc_test_params kfunc_tests[] = {
 	/* success cases */
 	TC_TEST(kfunc_call_test1, 12),
 	TC_TEST(kfunc_call_test2, 3),
+	TC_TEST(kfunc_call_test4, -1234),
 	TC_TEST(kfunc_call_test_ref_btf_id, 0),
 	TC_TEST(kfunc_call_test_get_mem, 42),
 	SYSCALL_TEST(kfunc_syscall_test, 0),
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index f636e50be259..d91c58d06d38 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -3,6 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 
+extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
 extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
 extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
 				  __u32 c, __u64 d) __ksym;
@@ -17,6 +18,23 @@ extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
 extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
 extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
 
+SEC("tc")
+int kfunc_call_test4(struct __sk_buff *skb)
+{
+	struct bpf_sock *sk = skb->sk;
+	long tmp;
+
+	if (!sk)
+		return -1;
+
+	sk = bpf_sk_fullsock(sk);
+	if (!sk)
+		return -1;
+
+	tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000);
+	return (tmp >> 32) + tmp;
+}
+
 SEC("tc")
 int kfunc_call_test2(struct __sk_buff *skb)
 {
-- 
cgit v1.2.3


From 207612eb12b912cad40c0299fefcabf3d6cc1f3f Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:34 +0100
Subject: selftests/bpf: Fix test_lsm on s390x

Use syscall macros to access the setdomainname() arguments; currently
the code uses gprs[2] instead of orig_gpr2 for the first argument.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-16-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/lsm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index d8d8af623bc2..dc93887ed34c 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -6,9 +6,10 @@
 
 #include "bpf_misc.h"
 #include "vmlinux.h"
+#include <bpf/bpf_core_read.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include  <errno.h>
+#include <errno.h>
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -164,8 +165,8 @@ int copy_test = 0;
 SEC("fentry.s/" SYS_PREFIX "sys_setdomainname")
 int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
 {
-	void *ptr = (void *)PT_REGS_PARM1(regs);
-	int len = PT_REGS_PARM2(regs);
+	void *ptr = (void *)PT_REGS_PARM1_SYSCALL(regs);
+	int len = PT_REGS_PARM2_SYSCALL(regs);
 	int buf = 0;
 	long ret;
 
-- 
cgit v1.2.3


From 26e8a014947948387790a029b310276ac083971b Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:35 +0100
Subject: selftests/bpf: Fix test_xdp_adjust_tail_grow2 on s390x

s390x cache line size is 256 bytes, so skb_shared_info must be aligned
on a much larger boundary than for x86.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-17-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c      | 7 ++++++-
 tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c | 8 +++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 39973ea1ce43..f09505f8b038 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -76,10 +76,15 @@ static void test_xdp_adjust_tail_grow2(void)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
 	char buf[4096]; /* avoid segfault: large buf to hold grow results */
-	int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
 	struct bpf_object *obj;
 	int err, cnt, i;
 	int max_grow, prog_fd;
+	/* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
+#if defined(__s390x__)
+	int tailroom = 512;
+#else
+	int tailroom = 320;
+#endif
 
 	LIBBPF_OPTS(bpf_test_run_opts, tattr,
 		.repeat		= 1,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 53b64c999450..297c260fc364 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -9,6 +9,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 	void *data = (void *)(long)xdp->data;
 	int data_len = bpf_xdp_get_buff_len(xdp);
 	int offset = 0;
+	/* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
+#if defined(__TARGET_ARCH_s390)
+	int tailroom = 512;
+#else
+	int tailroom = 320;
+#endif
 
 	/* Data length determine test case */
 
@@ -20,7 +26,7 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 		offset = 128;
 	} else if (data_len == 128) {
 		/* Max tail grow 3520 */
-		offset = 4096 - 256 - 320 - data_len;
+		offset = 4096 - 256 - tailroom - data_len;
 	} else if (data_len == 9000) {
 		offset = 10;
 	} else if (data_len == 9001) {
-- 
cgit v1.2.3


From d504270a233d2710cf9203b9ecec820736d0e042 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:36 +0100
Subject: selftests/bpf: Fix vmlinux test on s390x

Use a syscall macro to access the nanosleep()'s first argument;
currently the code uses gprs[2] instead of orig_gpr2.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-18-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_vmlinux.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index e9dfa0313d1b..4b8e37f7fd06 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -42,7 +42,7 @@ int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
 	if (id != __NR_nanosleep)
 		return 0;
 
-	ts = (void *)PT_REGS_PARM1_CORE(regs);
+	ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
 	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
 	    tv_nsec != MY_TV_NSEC)
 		return 0;
@@ -60,7 +60,7 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
 	if (id != __NR_nanosleep)
 		return 0;
 
-	ts = (void *)PT_REGS_PARM1_CORE(regs);
+	ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
 	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
 	    tv_nsec != MY_TV_NSEC)
 		return 0;
-- 
cgit v1.2.3


From 438a2edf26b7384d3c781ff98015d58135b848d5 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:38 +0100
Subject: selftests/bpf: Fix xdp_synproxy/tc on s390x

Use the correct datatype for the values map values; currently the test
works by accident, since on little-endian machines it is sometimes
acceptable to access u64 as u32.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-20-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 736686e903f6..07d786329105 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -310,7 +310,7 @@ static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale,
 static __always_inline void values_inc_synacks(void)
 {
 	__u32 key = 1;
-	__u32 *value;
+	__u64 *value;
 
 	value = bpf_map_lookup_elem(&values, &key);
 	if (value)
-- 
cgit v1.2.3


From 1b5e385325810b23e4e8d46f97d0e2e838e26802 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sat, 28 Jan 2023 01:06:39 +0100
Subject: selftests/bpf: Fix profiler on s390x

Use bpf_probe_read_kernel() and bpf_probe_read_kernel_str() instead
of bpf_probe_read() and bpf_probe_read_kernel().

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230128000650.1516334-21-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/profiler.inc.h | 62 +++++++++++++++---------
 1 file changed, 38 insertions(+), 24 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 92331053dba3..68a3fd7387a4 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -156,10 +156,10 @@ probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
 {
 	len = len < max ? len : max;
 	if (len > 1) {
-		if (bpf_probe_read(dst, len, src))
+		if (bpf_probe_read_kernel(dst, len, src))
 			return 0;
 	} else if (len == 1) {
-		if (bpf_probe_read(dst, 1, src))
+		if (bpf_probe_read_kernel(dst, 1, src))
 			return 0;
 	}
 	return len;
@@ -216,7 +216,8 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
 #endif
 	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
 		filepart_length =
-			bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
+			bpf_probe_read_kernel_str(payload, MAX_PATH,
+						  BPF_CORE_READ(cgroup_node, name));
 		if (!cgroup_node)
 			return payload;
 		if (cgroup_node == cgroup_root_node)
@@ -303,7 +304,8 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 	cgroup_data->cgroup_full_length = 0;
 
 	size_t cgroup_root_length =
-		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
+		bpf_probe_read_kernel_str(payload, MAX_PATH,
+					  BPF_CORE_READ(root_kernfs, name));
 	barrier_var(cgroup_root_length);
 	if (cgroup_root_length <= MAX_PATH) {
 		barrier_var(cgroup_root_length);
@@ -312,7 +314,8 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 	}
 
 	size_t cgroup_proc_length =
-		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
+		bpf_probe_read_kernel_str(payload, MAX_PATH,
+					  BPF_CORE_READ(proc_kernfs, name));
 	barrier_var(cgroup_proc_length);
 	if (cgroup_proc_length <= MAX_PATH) {
 		barrier_var(cgroup_proc_length);
@@ -395,7 +398,8 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
 		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
 		if (arr_struct == NULL)
 			return 0;
-		bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
+		bpf_probe_read_kernel(&arr_struct->array[0],
+				      sizeof(arr_struct->array[0]), kill_data);
 	} else {
 		int index = get_var_spid_index(arr_struct, spid);
 
@@ -409,8 +413,9 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
 #endif
 			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
 				if (arr_struct->array[i].meta.pid == 0) {
-					bpf_probe_read(&arr_struct->array[i],
-						       sizeof(arr_struct->array[i]), kill_data);
+					bpf_probe_read_kernel(&arr_struct->array[i],
+							      sizeof(arr_struct->array[i]),
+							      kill_data);
 					bpf_map_update_elem(&var_tpid_to_data, &tpid,
 							    arr_struct, 0);
 
@@ -427,17 +432,17 @@ static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
 		if (delta_sec < STALE_INFO) {
 			kill_data->kill_count++;
 			kill_data->last_kill_time = bpf_ktime_get_ns();
-			bpf_probe_read(&arr_struct->array[index],
-				       sizeof(arr_struct->array[index]),
-				       kill_data);
+			bpf_probe_read_kernel(&arr_struct->array[index],
+					      sizeof(arr_struct->array[index]),
+					      kill_data);
 		} else {
 			struct var_kill_data_t* kill_data =
 				get_var_kill_data(ctx, spid, tpid, sig);
 			if (kill_data == NULL)
 				return 0;
-			bpf_probe_read(&arr_struct->array[index],
-				       sizeof(arr_struct->array[index]),
-				       kill_data);
+			bpf_probe_read_kernel(&arr_struct->array[index],
+					      sizeof(arr_struct->array[index]),
+					      kill_data);
 		}
 	}
 	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
@@ -487,8 +492,9 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
 #pragma unroll
 #endif
 	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
-		filepart_length = bpf_probe_read_str(payload, MAX_PATH,
-						     BPF_CORE_READ(filp_dentry, d_name.name));
+		filepart_length =
+			bpf_probe_read_kernel_str(payload, MAX_PATH,
+						  BPF_CORE_READ(filp_dentry, d_name.name));
 		barrier_var(filepart_length);
 		if (filepart_length > MAX_PATH)
 			break;
@@ -572,7 +578,8 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write,
 	sysctl_data->sysctl_val_length = 0;
 	sysctl_data->sysctl_path_length = 0;
 
-	size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
+	size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
+							     CTL_MAXNAME, buf);
 	barrier_var(sysctl_val_length);
 	if (sysctl_val_length <= CTL_MAXNAME) {
 		barrier_var(sysctl_val_length);
@@ -580,8 +587,10 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write,
 		payload += sysctl_val_length;
 	}
 
-	size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
-						       BPF_CORE_READ(filp, f_path.dentry, d_name.name));
+	size_t sysctl_path_length =
+		bpf_probe_read_kernel_str(payload, MAX_PATH,
+					  BPF_CORE_READ(filp, f_path.dentry,
+							d_name.name));
 	barrier_var(sysctl_path_length);
 	if (sysctl_path_length <= MAX_PATH) {
 		barrier_var(sysctl_path_length);
@@ -638,7 +647,8 @@ int raw_tracepoint__sched_process_exit(void* ctx)
 		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
 
 		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
-			bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
+			bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
+					      past_kill_data);
 			void* payload = kill_data->payload;
 			size_t offset = kill_data->payload_length;
 			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
@@ -656,8 +666,10 @@ int raw_tracepoint__sched_process_exit(void* ctx)
 				payload += comm_length;
 			}
 
-			size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
-								       BPF_CORE_READ(proc_kernfs, name));
+			size_t cgroup_proc_length =
+				bpf_probe_read_kernel_str(payload,
+							  KILL_TARGET_LEN,
+							  BPF_CORE_READ(proc_kernfs, name));
 			barrier_var(cgroup_proc_length);
 			if (cgroup_proc_length <= KILL_TARGET_LEN) {
 				barrier_var(cgroup_proc_length);
@@ -718,7 +730,8 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
 	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
 
 	const char* filename = BPF_CORE_READ(bprm, filename);
-	size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
+	size_t bin_path_length =
+		bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
 	barrier_var(bin_path_length);
 	if (bin_path_length <= MAX_FILENAME_LEN) {
 		barrier_var(bin_path_length);
@@ -922,7 +935,8 @@ int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
 					      filemod_data->payload);
 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 
-	size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
+	size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
+					       oldname);
 	barrier_var(len);
 	if (len <= MAX_FILEPATH_LENGTH) {
 		barrier_var(len);
-- 
cgit v1.2.3


From 7ce878ca81bca7811e669db4c394b86780e0dbe4 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sun, 29 Jan 2023 20:04:54 +0100
Subject: selftests/bpf: Fix sk_assign on s390x

sk_assign is failing on an s390x machine running Debian "bookworm" for
2 reasons: legacy server_map definition and uninitialized addrlen in
recvfrom() call.

Fix by adding a new-style server_map definition and dropping addrlen
(recvfrom() allows NULL values for src_addr and addrlen).

Since the test should support tc built without libbpf, build the prog
twice: with the old-style definition and with the new-style definition,
then select the right one at runtime. This could be done at compile
time too, but this would not be cross-compilation friendly.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230129190501.1624747-2-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/sk_assign.c | 25 ++++++++++++++++------
 tools/testing/selftests/bpf/progs/test_sk_assign.c | 11 ++++++++++
 .../selftests/bpf/progs/test_sk_assign_libbpf.c    |  3 +++
 3 files changed, 33 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 3e190ed63976..1374b626a985 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -29,7 +29,23 @@ static int stop, duration;
 static bool
 configure_stack(void)
 {
+	char tc_version[128];
 	char tc_cmd[BUFSIZ];
+	char *prog;
+	FILE *tc;
+
+	/* Check whether tc is built with libbpf. */
+	tc = popen("tc -V", "r");
+	if (CHECK_FAIL(!tc))
+		return false;
+	if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
+		return false;
+	if (strstr(tc_version, ", libbpf "))
+		prog = "test_sk_assign_libbpf.bpf.o";
+	else
+		prog = "test_sk_assign.bpf.o";
+	if (CHECK_FAIL(pclose(tc)))
+		return false;
 
 	/* Move to a new networking namespace */
 	if (CHECK_FAIL(unshare(CLONE_NEWNET)))
@@ -46,8 +62,8 @@ configure_stack(void)
 	/* Load qdisc, BPF program */
 	if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
 		return false;
-	sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
-		       "direct-action object-file ./test_sk_assign.bpf.o",
+	sprintf(tc_cmd, "%s %s %s %s %s", "tc filter add dev lo ingress bpf",
+		       "direct-action object-file", prog,
 		       "section tc",
 		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
 	if (CHECK(system(tc_cmd), "BPF load failed;",
@@ -129,15 +145,12 @@ get_port(int fd)
 static ssize_t
 rcv_msg(int srv_client, int type)
 {
-	struct sockaddr_storage ss;
 	char buf[BUFSIZ];
-	socklen_t slen;
 
 	if (type == SOCK_STREAM)
 		return read(srv_client, &buf, sizeof(buf));
 	else
-		return recvfrom(srv_client, &buf, sizeof(buf), 0,
-				(struct sockaddr *)&ss, &slen);
+		return recvfrom(srv_client, &buf, sizeof(buf), 0, NULL, NULL);
 }
 
 static int
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 98c6493d9b91..21b19b758c4e 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -16,6 +16,16 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
+#if defined(IPROUTE2_HAVE_LIBBPF)
+/* Use a new-style map definition. */
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__type(key, int);
+	__type(value, __u64);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);
+	__uint(max_entries, 1);
+} server_map SEC(".maps");
+#else
 /* Pin map under /sys/fs/bpf/tc/globals/<map name> */
 #define PIN_GLOBAL_NS 2
 
@@ -35,6 +45,7 @@ struct {
 	.max_elem = 1,
 	.pinning = PIN_GLOBAL_NS,
 };
+#endif
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
new file mode 100644
index 000000000000..dcf46adfda04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define IPROUTE2_HAVE_LIBBPF
+#include "test_sk_assign.c"
-- 
cgit v1.2.3


From af320fb7ddb091a401a407bb256a9abd85587624 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sun, 29 Jan 2023 20:05:00 +0100
Subject: selftests/bpf: Fix s390x vmlinux path

After commit edd4a8667355 ("s390/boot: get rid of startup archive")
there is no more compressed/ subdirectory.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230129190501.1624747-8-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/vmtest.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 316a56d680f2..685034528018 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -13,7 +13,7 @@ s390x)
 	QEMU_BINARY=qemu-system-s390x
 	QEMU_CONSOLE="ttyS1"
 	QEMU_FLAGS=(-smp 2)
-	BZIMAGE="arch/s390/boot/compressed/vmlinux"
+	BZIMAGE="arch/s390/boot/vmlinux"
 	;;
 x86_64)
 	QEMU_BINARY=qemu-system-x86_64
-- 
cgit v1.2.3


From ee105d5a50d4075be81b2d924bfc630e0d7cdb90 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Sun, 29 Jan 2023 20:05:01 +0100
Subject: selftests/bpf: Trim DENYLIST.s390x

Now that trampoline is implemented, enable a number of tests on s390x.
18 of the remaining failures have to do with either lack of rethook
(fixed by [1]) or syscall symbols missing from BTF (fixed by [2]).

Do not re-classify the remaining failures for now; wait until the
s390/for-next fixes are merged and re-classify only the remaining few.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=for-next&id=1a280f48c0e403903cf0b4231c95b948e664f25a
[2] https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=for-next&id=2213d44e140f979f4b60c3c0f8dd56d151cc8692

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230129190501.1624747-9-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 69 ------------------------------
 1 file changed, 69 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 50924611e5bb..b89eb87034e4 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,93 +1,24 @@
 # TEMPORARY
 # Alphabetical order
-atomics                                  # attach(add): actual -524 <= expected 0                                      (trampoline)
 bloom_filter_map                         # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3                (?)
 bpf_cookie                               # failed to open_and_load program: -524 (trampoline)
-bpf_iter_setsockopt                      # JIT does not support calling kernel function                                (kfunc)
 bpf_loop                                 # attaches to __x64_sys_nanosleep
-bpf_mod_race                             # BPF trampoline
-bpf_nf                                   # JIT does not support calling kernel function
-bpf_tcp_ca                               # JIT does not support calling kernel function                                (kfunc)
-cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
-cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
-cgrp_kfunc                               # JIT does not support calling kernel function
 cgrp_local_storage                       # prog_attach unexpected error: -524                                          (trampoline)
-core_read_macros                         # unknown func bpf_probe_read#4                                               (overlapping)
-cpumask                                  # JIT does not support calling kernel function
-d_path                                   # failed to auto-attach program 'prog_stat': -524                             (trampoline)
-decap_sanity                             # JIT does not support calling kernel function                                (kfunc)
-deny_namespace                           # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-dummy_st_ops                             # test_run unexpected error: -524 (errno 524)                                 (trampoline)
-fentry_fexit                             # fentry attach failed: -524                                                  (trampoline)
-fentry_test                              # fentry_first_attach unexpected error: -524                                  (trampoline)
-fexit_bpf2bpf                            # freplace_attach_trace unexpected error: -524                                (trampoline)
 fexit_sleep                              # fexit_skel_load fexit skeleton failed                                       (trampoline)
-fexit_stress                             # fexit attach failed prog 0 failed: -524                                     (trampoline)
-fexit_test                               # fexit_first_attach unexpected error: -524                                   (trampoline)
-get_func_args_test	                 # trampoline
-get_func_ip_test                         # get_func_ip_test__attach unexpected error: -524                             (trampoline)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
-htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-jit_probe_mem                            # jit_probe_mem__open_and_load unexpected error: -524                         (kfunc)
-kfree_skb                                # attach fentry unexpected error: -524                                        (trampoline)
-kfunc_call                               # 'bpf_prog_active': not found in kernel BTF                                  (?)
-kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
 kprobe_multi_bench_attach                # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 kprobe_multi_test                        # relies on fentry
 ksyms_module                             # test_ksyms_module__open_and_load unexpected error: -9                       (?)
 ksyms_module_libbpf                      # JIT does not support calling kernel function                                (kfunc)
 ksyms_module_lskel                       # test_ksyms_module_lskel__open_and_load unexpected error: -9                 (?)
-libbpf_get_fd_by_id_opts                 # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
-linked_list				 # JIT does not support calling kernel function                                (kfunc)
-lookup_key                               # JIT does not support calling kernel function                                (kfunc)
-lru_bug                                  # prog 'printk': failed to auto-attach: -524
-map_kptr                                 # failed to open_and_load program: -524 (trampoline)
-modify_return                            # modify_return attach failed: -524                                           (trampoline)
 module_attach                            # skel_attach skeleton attach failed: -524                                    (trampoline)
-mptcp
-nested_trust                             # JIT does not support calling kernel function
-netcnt                                   # failed to load BPF skeleton 'netcnt_prog': -7                               (?)
-probe_user                               # check_kprobe_res wrong kprobe res from probe read                           (?)
-rcu_read_lock                            # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3                (?)
-recursion                                # skel_attach unexpected error: -524                                          (trampoline)
 ringbuf                                  # skel_load skeleton load failed                                              (?)
-select_reuseport                         # intermittently fails on new s390x setup
-send_signal                              # intermittently fails to receive signal
-setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
-sk_assign                                # Can't read on server: Invalid argument                                      (?)
-sk_lookup                                # endianness problem
-sk_storage_tracing                       # test_sk_storage_tracing__attach unexpected error: -524                      (trampoline)
-skc_to_unix_sock                         # could not attach BPF object unexpected error: -524                          (trampoline)
-socket_cookie                            # prog_attach unexpected error: -524                                          (trampoline)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
-tailcalls                                # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls      (?)
-task_kfunc                               # JIT does not support calling kernel function
-task_local_storage                       # failed to auto-attach program 'trace_exit_creds': -524                      (trampoline)
-test_bpffs                               # bpffs test  failed 255                                                      (iterator)
-test_bprm_opts                           # failed to auto-attach program 'secure_exec': -524                           (trampoline)
-test_ima                                 # failed to auto-attach program 'ima': -524                                   (trampoline)
-test_local_storage                       # failed to auto-attach program 'unlink_hook': -524                           (trampoline)
 test_lsm                                 # attach unexpected error: -524                                               (trampoline)
-test_overhead                            # attach_fentry unexpected error: -524                                        (trampoline)
-test_profiler                            # unknown func bpf_probe_read_str#45                                          (overlapping)
-timer                                    # failed to auto-attach program 'test1': -524                                 (trampoline)
-timer_crash                              # trampoline
-timer_mim                                # failed to auto-attach program 'test1': -524                                 (trampoline)
-trace_ext                                # failed to auto-attach program 'test_pkt_md_access_new': -524                (trampoline)
 trace_printk                             # trace_printk__load unexpected error: -2 (errno 2)                           (?)
 trace_vprintk                            # trace_vprintk__open_and_load unexpected error: -9                           (?)
-tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
-trampoline_count                         # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22                  (trampoline)
-type_cast                                # JIT does not support calling kernel function
 unpriv_bpf_disabled                      # fentry
 user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
 verif_stats                              # trace_vprintk__open_and_load unexpected error: -9                           (?)
-verify_pkcs7_sig                         # JIT does not support calling kernel function                                (kfunc)
-vmlinux                                  # failed to auto-attach program 'handle__fentry': -524                        (trampoline)
-xdp_adjust_tail                          # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520                  (?)
 xdp_bonding                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
-xdp_bpf2bpf                              # failed to auto-attach program 'trace_on_entry': -524                        (trampoline)
-xdp_do_redirect                          # prog_run_max_size unexpected error: -22 (errno 22)
 xdp_metadata                             # JIT does not support calling kernel function                                (kfunc)
-xdp_synproxy                             # JIT does not support calling kernel function                                (kfunc)
-xfrm_info                                # JIT does not support calling kernel function                                (kfunc)
-- 
cgit v1.2.3


From 1680801ef64d12b44048e0a35a913d7b13d78be0 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 30 Jan 2023 17:40:01 +0100
Subject: selftests: mlxsw: qos_dscp_bridge: Convert from lldptool to dcb

Set up DSCP prioritization through the iproute2 dcb tool, which is easier
to understand and manage.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Danielle Ratson <danieller@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/qos_dscp_bridge.sh | 23 +++++-----------------
 1 file changed, 5 insertions(+), 18 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 28a570006d4d..87c41f5727c9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -20,7 +20,7 @@
 # | SW |                                                                |     |
 # |  +-|----------------------------------------------------------------|-+   |
 # |  | + $swp1                       BR                           $swp2 + |   |
-# |  |   APP=0,5,10 .. 7,5,17                      APP=0,5,20 .. 7,5,27   |   |
+# |  |   dcb dscp-prio 10:0...17:7            dcb dscp-prio 20:0...27:7   |   |
 # |  +--------------------------------------------------------------------+   |
 # +---------------------------------------------------------------------------+
 
@@ -62,16 +62,6 @@ h2_destroy()
 	simple_if_fini $h2 192.0.2.2/28
 }
 
-dscp_map()
-{
-	local base=$1; shift
-	local prio
-
-	for prio in {0..7}; do
-		echo app=$prio,5,$((base + prio))
-	done
-}
-
 switch_create()
 {
 	ip link add name br1 type bridge vlan_filtering 1
@@ -81,17 +71,14 @@ switch_create()
 	ip link set dev $swp2 master br1
 	ip link set dev $swp2 up
 
-	lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
-	lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
-	lldpad_app_wait_set $swp1
-	lldpad_app_wait_set $swp2
+	dcb app add dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
+	dcb app add dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
 }
 
 switch_destroy()
 {
-	lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
-	lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
-	lldpad_app_wait_del
+	dcb app del dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
+	dcb app del dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
 
 	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
-- 
cgit v1.2.3


From 10d5bd0b69edb5af7933ba76dd23af3ff9d63b29 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 30 Jan 2023 17:40:02 +0100
Subject: selftests: mlxsw: qos_dscp_router: Convert from lldptool to dcb

Set up DSCP prioritization through the iproute2 dcb tool, which is easier
to understand and manage.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Danielle Ratson <danieller@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/qos_dscp_router.sh | 27 +++++-----------------
 1 file changed, 6 insertions(+), 21 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
index 4cb2aa65278a..f6c23f84423e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -94,16 +94,6 @@ h2_destroy()
 	simple_if_fini $h2 192.0.2.18/28
 }
 
-dscp_map()
-{
-	local base=$1; shift
-	local prio
-
-	for prio in {0..7}; do
-		echo app=$prio,5,$((base + prio))
-	done
-}
-
 switch_create()
 {
 	simple_if_init $swp1 192.0.2.2/28
@@ -112,17 +102,14 @@ switch_create()
 	tc qdisc add dev $swp1 clsact
 	tc qdisc add dev $swp2 clsact
 
-	lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
-	lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
-	lldpad_app_wait_set $swp1
-	lldpad_app_wait_set $swp2
+	dcb app add dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+	dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
 }
 
 switch_destroy()
 {
-	lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
-	lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
-	lldpad_app_wait_del
+	dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+	dcb app del dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
 
 	tc qdisc del dev $swp2 clsact
 	tc qdisc del dev $swp1 clsact
@@ -265,13 +252,11 @@ test_dscp_leftover()
 {
 	echo "Test that last removed DSCP rule is deconfigured correctly"
 
-	lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
-	lldpad_app_wait_del
+	dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
 
 	__test_update 0 zero
 
-	lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
-	lldpad_app_wait_set $swp2
+	dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
 }
 
 trap cleanup EXIT
-- 
cgit v1.2.3


From 5b3ef0452c593b3439278cbc919119c770786f25 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 30 Jan 2023 17:40:03 +0100
Subject: selftests: mlxsw: qos_defprio: Convert from lldptool to dcb

Set up default port priority through the iproute2 dcb tool, which is easier
to understand and manage.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Danielle Ratson <danieller@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/qos_defprio.sh     | 68 +++++-----------------
 1 file changed, 16 insertions(+), 52 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
index 71066bc4b886..5492fa5550d7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -5,18 +5,18 @@
 # prioritized according to the default priority specified at the port.
 # rx_octets_prio_* counters are used to verify the prioritization.
 #
-# +-----------------------+
-# | H1                    |
-# |    + $h1              |
-# |    | 192.0.2.1/28     |
-# +----|------------------+
+# +----------------------------------+
+# | H1                               |
+# |    + $h1                         |
+# |    | 192.0.2.1/28                |
+# +----|-----------------------------+
 #      |
-# +----|------------------+
-# | SW |                  |
-# |    + $swp1            |
-# |      192.0.2.2/28     |
-# |      APP=<prio>,1,0   |
-# +-----------------------+
+# +----|-----------------------------+
+# | SW |                             |
+# |    + $swp1                       |
+# |      192.0.2.2/28                |
+# |      dcb app default-prio <prio> |
+# +----------------------------------+
 
 ALL_TESTS="
 	ping_ipv4
@@ -29,42 +29,6 @@ NUM_NETIFS=2
 : ${HIT_TIMEOUT:=1000} # ms
 source $lib_dir/lib.sh
 
-declare -a APP
-
-defprio_install()
-{
-	local dev=$1; shift
-	local prio=$1; shift
-	local app="app=$prio,1,0"
-
-	lldptool -T -i $dev -V APP $app >/dev/null
-	lldpad_app_wait_set $dev
-	APP[$prio]=$app
-}
-
-defprio_uninstall()
-{
-	local dev=$1; shift
-	local prio=$1; shift
-	local app=${APP[$prio]}
-
-	lldptool -T -i $dev -V APP -d $app >/dev/null
-	lldpad_app_wait_del
-	unset APP[$prio]
-}
-
-defprio_flush()
-{
-	local dev=$1; shift
-	local prio
-
-	if ((${#APP[@]})); then
-		lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null
-	fi
-	lldpad_app_wait_del
-	APP=()
-}
-
 h1_create()
 {
 	simple_if_init $h1 192.0.2.1/28
@@ -83,7 +47,7 @@ switch_create()
 
 switch_destroy()
 {
-	defprio_flush $swp1
+	dcb app flush dev $swp1 default-prio
 	ip addr del dev $swp1 192.0.2.2/28
 	ip link set dev $swp1 down
 }
@@ -124,7 +88,7 @@ __test_defprio()
 
 	RET=0
 
-	defprio_install $swp1 $prio_install
+	dcb app add dev $swp1 default-prio $prio_install
 
 	local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
 	mausezahn -q $h1 -d 100m -c 10 -t arp reply
@@ -134,7 +98,7 @@ __test_defprio()
 	check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
 	log_test "Default priority $prio_install/$prio_observe"
 
-	defprio_uninstall $swp1 $prio_install
+	dcb app del dev $swp1 default-prio $prio_install
 }
 
 test_defprio()
@@ -145,7 +109,7 @@ test_defprio()
 		__test_defprio $prio $prio
 	done
 
-	defprio_install $swp1 3
+	dcb app add dev $swp1 default-prio 3
 	__test_defprio 0 3
 	__test_defprio 1 3
 	__test_defprio 2 3
@@ -153,7 +117,7 @@ test_defprio()
 	__test_defprio 5 5
 	__test_defprio 6 6
 	__test_defprio 7 7
-	defprio_uninstall $swp1 3
+	dcb app del dev $swp1 default-prio 3
 }
 
 trap cleanup EXIT
-- 
cgit v1.2.3


From bd32ff68721c3c1c5943f5493b62aae99190af36 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 30 Jan 2023 17:40:04 +0100
Subject: selftests: net: forwarding: lib: Drop lldpad_app_wait_set(), _del()

The existing users of these helpers have been converted to iproute2 dcb.
Drop the helpers.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Danielle Ratson <danieller@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/lib.sh | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1c4f866de7d7..29cd4705c752 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -524,27 +524,6 @@ cmd_jq()
 	[ ! -z "$output" ]
 }
 
-lldpad_app_wait_set()
-{
-	local dev=$1; shift
-
-	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
-		echo "$dev: waiting for lldpad to push pending APP updates"
-		sleep 5
-	done
-}
-
-lldpad_app_wait_del()
-{
-	# Give lldpad a chance to push down the changes. If the device is downed
-	# too soon, the updates will be left pending. However, they will have
-	# been struck off the lldpad's DB already, so we won't be able to tell
-	# they are pending. Then on next test iteration this would cause
-	# weirdness as newly-added APP rules conflict with the old ones,
-	# sometimes getting stuck in an "unknown" state.
-	sleep 5
-}
-
 pre_cleanup()
 {
 	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
-- 
cgit v1.2.3


From 400031e05adfcef9e80eca80bdfc3f4b63658be4 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 1 Feb 2023 11:30:15 -0600
Subject: bpf: Add __bpf_kfunc tag to all kfuncs

Now that we have the __bpf_kfunc tag, we should use add it to all
existing kfuncs to ensure that they'll never be elided in LTO builds.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20230201173016.342758-4-void@manifault.com
---
 kernel/bpf/cpumask.c                               | 60 +++++++++++-----------
 kernel/bpf/helpers.c                               | 38 +++++++-------
 kernel/cgroup/rstat.c                              |  4 +-
 kernel/kexec_core.c                                |  3 +-
 kernel/trace/bpf_trace.c                           |  8 +--
 net/bpf/test_run.c                                 | 55 ++++++++++----------
 net/core/xdp.c                                     |  5 +-
 net/ipv4/tcp_bbr.c                                 | 16 +++---
 net/ipv4/tcp_cong.c                                | 10 ++--
 net/ipv4/tcp_cubic.c                               | 12 ++---
 net/ipv4/tcp_dctcp.c                               | 12 ++---
 net/netfilter/nf_conntrack_bpf.c                   | 20 ++++----
 net/netfilter/nf_nat_bpf.c                         |  6 +--
 net/xfrm/xfrm_interface_bpf.c                      |  7 +--
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        |  2 +-
 15 files changed, 130 insertions(+), 128 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 6bbb67dfc998..52b981512a35 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -48,7 +48,7 @@ __diag_ignore_all("-Wmissing-prototypes",
  * bpf_cpumask_create() allocates memory using the BPF memory allocator, and
  * will not block. It may return NULL if no memory is available.
  */
-struct bpf_cpumask *bpf_cpumask_create(void)
+__bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void)
 {
 	struct bpf_cpumask *cpumask;
 
@@ -74,7 +74,7 @@ struct bpf_cpumask *bpf_cpumask_create(void)
  * must either be embedded in a map as a kptr, or freed with
  * bpf_cpumask_release().
  */
-struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
+__bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
 {
 	refcount_inc(&cpumask->usage);
 	return cpumask;
@@ -90,7 +90,7 @@ struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
  * kptr, or freed with bpf_cpumask_release(). This function may return NULL if
  * no BPF cpumask was found in the specified map value.
  */
-struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
+__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
 {
 	struct bpf_cpumask *cpumask;
 
@@ -116,7 +116,7 @@ struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
  * reference of the BPF cpumask has been released, it is subsequently freed in
  * an RCU callback in the BPF memory allocator.
  */
-void bpf_cpumask_release(struct bpf_cpumask *cpumask)
+__bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
 {
 	if (!cpumask)
 		return;
@@ -135,7 +135,7 @@ void bpf_cpumask_release(struct bpf_cpumask *cpumask)
  * Find the index of the first nonzero bit of the cpumask. A struct bpf_cpumask
  * pointer may be safely passed to this function.
  */
-u32 bpf_cpumask_first(const struct cpumask *cpumask)
+__bpf_kfunc u32 bpf_cpumask_first(const struct cpumask *cpumask)
 {
 	return cpumask_first(cpumask);
 }
@@ -148,7 +148,7 @@ u32 bpf_cpumask_first(const struct cpumask *cpumask)
  * Find the index of the first unset bit of the cpumask. A struct bpf_cpumask
  * pointer may be safely passed to this function.
  */
-u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
+__bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
 {
 	return cpumask_first_zero(cpumask);
 }
@@ -158,7 +158,7 @@ u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
  * @cpu: The CPU to be set in the cpumask.
  * @cpumask: The BPF cpumask in which a bit is being set.
  */
-void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+__bpf_kfunc void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
 {
 	if (!cpu_valid(cpu))
 		return;
@@ -171,7 +171,7 @@ void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
  * @cpu: The CPU to be cleared from the cpumask.
  * @cpumask: The BPF cpumask in which a bit is being cleared.
  */
-void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+__bpf_kfunc void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
 {
 	if (!cpu_valid(cpu))
 		return;
@@ -188,7 +188,7 @@ void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
  * * true  - @cpu is set in the cpumask
  * * false - @cpu was not set in the cpumask, or @cpu is an invalid cpu.
  */
-bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask)
+__bpf_kfunc bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask)
 {
 	if (!cpu_valid(cpu))
 		return false;
@@ -205,7 +205,7 @@ bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask)
  * * true  - @cpu is set in the cpumask
  * * false - @cpu was not set in the cpumask, or @cpu is invalid.
  */
-bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+__bpf_kfunc bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
 {
 	if (!cpu_valid(cpu))
 		return false;
@@ -223,7 +223,7 @@ bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
  * * true  - @cpu is set in the cpumask
  * * false - @cpu was not set in the cpumask, or @cpu is invalid.
  */
-bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+__bpf_kfunc bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
 {
 	if (!cpu_valid(cpu))
 		return false;
@@ -235,7 +235,7 @@ bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
  * bpf_cpumask_setall() - Set all of the bits in a BPF cpumask.
  * @cpumask: The BPF cpumask having all of its bits set.
  */
-void bpf_cpumask_setall(struct bpf_cpumask *cpumask)
+__bpf_kfunc void bpf_cpumask_setall(struct bpf_cpumask *cpumask)
 {
 	cpumask_setall((struct cpumask *)cpumask);
 }
@@ -244,7 +244,7 @@ void bpf_cpumask_setall(struct bpf_cpumask *cpumask)
  * bpf_cpumask_clear() - Clear all of the bits in a BPF cpumask.
  * @cpumask: The BPF cpumask being cleared.
  */
-void bpf_cpumask_clear(struct bpf_cpumask *cpumask)
+__bpf_kfunc void bpf_cpumask_clear(struct bpf_cpumask *cpumask)
 {
 	cpumask_clear((struct cpumask *)cpumask);
 }
@@ -261,9 +261,9 @@ void bpf_cpumask_clear(struct bpf_cpumask *cpumask)
  *
  * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
  */
-bool bpf_cpumask_and(struct bpf_cpumask *dst,
-		     const struct cpumask *src1,
-		     const struct cpumask *src2)
+__bpf_kfunc bool bpf_cpumask_and(struct bpf_cpumask *dst,
+				 const struct cpumask *src1,
+				 const struct cpumask *src2)
 {
 	return cpumask_and((struct cpumask *)dst, src1, src2);
 }
@@ -276,9 +276,9 @@ bool bpf_cpumask_and(struct bpf_cpumask *dst,
  *
  * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
  */
-void bpf_cpumask_or(struct bpf_cpumask *dst,
-		    const struct cpumask *src1,
-		    const struct cpumask *src2)
+__bpf_kfunc void bpf_cpumask_or(struct bpf_cpumask *dst,
+				const struct cpumask *src1,
+				const struct cpumask *src2)
 {
 	cpumask_or((struct cpumask *)dst, src1, src2);
 }
@@ -291,9 +291,9 @@ void bpf_cpumask_or(struct bpf_cpumask *dst,
  *
  * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
  */
-void bpf_cpumask_xor(struct bpf_cpumask *dst,
-		     const struct cpumask *src1,
-		     const struct cpumask *src2)
+__bpf_kfunc void bpf_cpumask_xor(struct bpf_cpumask *dst,
+				 const struct cpumask *src1,
+				 const struct cpumask *src2)
 {
 	cpumask_xor((struct cpumask *)dst, src1, src2);
 }
@@ -309,7 +309,7 @@ void bpf_cpumask_xor(struct bpf_cpumask *dst,
  *
  * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
  */
-bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2)
+__bpf_kfunc bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2)
 {
 	return cpumask_equal(src1, src2);
 }
@@ -325,7 +325,7 @@ bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2)
  *
  * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
  */
-bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2)
+__bpf_kfunc bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2)
 {
 	return cpumask_intersects(src1, src2);
 }
@@ -341,7 +341,7 @@ bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *sr
  *
  * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
  */
-bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2)
+__bpf_kfunc bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2)
 {
 	return cpumask_subset(src1, src2);
 }
@@ -356,7 +356,7 @@ bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2)
  *
  * A struct bpf_cpumask pointer may be safely passed to @cpumask.
  */
-bool bpf_cpumask_empty(const struct cpumask *cpumask)
+__bpf_kfunc bool bpf_cpumask_empty(const struct cpumask *cpumask)
 {
 	return cpumask_empty(cpumask);
 }
@@ -371,7 +371,7 @@ bool bpf_cpumask_empty(const struct cpumask *cpumask)
  *
  * A struct bpf_cpumask pointer may be safely passed to @cpumask.
  */
-bool bpf_cpumask_full(const struct cpumask *cpumask)
+__bpf_kfunc bool bpf_cpumask_full(const struct cpumask *cpumask)
 {
 	return cpumask_full(cpumask);
 }
@@ -383,7 +383,7 @@ bool bpf_cpumask_full(const struct cpumask *cpumask)
  *
  * A struct bpf_cpumask pointer may be safely passed to @src.
  */
-void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src)
+__bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src)
 {
 	cpumask_copy((struct cpumask *)dst, src);
 }
@@ -398,7 +398,7 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src)
  *
  * A struct bpf_cpumask pointer may be safely passed to @src.
  */
-u32 bpf_cpumask_any(const struct cpumask *cpumask)
+__bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask)
 {
 	return cpumask_any(cpumask);
 }
@@ -415,7 +415,7 @@ u32 bpf_cpumask_any(const struct cpumask *cpumask)
  *
  * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
  */
-u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2)
+__bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2)
 {
 	return cpumask_any_and(src1, src2);
 }
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 458db2db2f81..2dae44581922 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1776,7 +1776,7 @@ __diag_push();
 __diag_ignore_all("-Wmissing-prototypes",
 		  "Global functions as their definitions will be in vmlinux BTF");
 
-void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
 {
 	struct btf_struct_meta *meta = meta__ign;
 	u64 size = local_type_id__k;
@@ -1790,7 +1790,7 @@ void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
 	return p;
 }
 
-void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
 {
 	struct btf_struct_meta *meta = meta__ign;
 	void *p = p__alloc;
@@ -1811,12 +1811,12 @@ static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *hea
 	tail ? list_add_tail(n, h) : list_add(n, h);
 }
 
-void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
+__bpf_kfunc void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
 {
 	return __bpf_list_add(node, head, false);
 }
 
-void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
+__bpf_kfunc void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
 {
 	return __bpf_list_add(node, head, true);
 }
@@ -1834,12 +1834,12 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
 	return (struct bpf_list_node *)n;
 }
 
-struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
+__bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
 {
 	return __bpf_list_del(head, false);
 }
 
-struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
+__bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
 {
 	return __bpf_list_del(head, true);
 }
@@ -1850,7 +1850,7 @@ struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
  * bpf_task_release().
  * @p: The task on which a reference is being acquired.
  */
-struct task_struct *bpf_task_acquire(struct task_struct *p)
+__bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
 {
 	return get_task_struct(p);
 }
@@ -1861,7 +1861,7 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
  * released by calling bpf_task_release().
  * @p: The task on which a reference is being acquired.
  */
-struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
+__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
 {
 	/* For the time being this function returns NULL, as it's not currently
 	 * possible to safely acquire a reference to a task with RCU protection
@@ -1913,7 +1913,7 @@ struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
  * be released by calling bpf_task_release().
  * @pp: A pointer to a task kptr on which a reference is being acquired.
  */
-struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
+__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
 {
 	/* We must return NULL here until we have clarity on how to properly
 	 * leverage RCU for ensuring a task's lifetime. See the comment above
@@ -1926,7 +1926,7 @@ struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
  * bpf_task_release - Release the reference acquired on a task.
  * @p: The task on which a reference is being released.
  */
-void bpf_task_release(struct task_struct *p)
+__bpf_kfunc void bpf_task_release(struct task_struct *p)
 {
 	if (!p)
 		return;
@@ -1941,7 +1941,7 @@ void bpf_task_release(struct task_struct *p)
  * calling bpf_cgroup_release().
  * @cgrp: The cgroup on which a reference is being acquired.
  */
-struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
+__bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
 {
 	cgroup_get(cgrp);
 	return cgrp;
@@ -1953,7 +1953,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
  * be released by calling bpf_cgroup_release().
  * @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired.
  */
-struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
+__bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
 {
 	struct cgroup *cgrp;
 
@@ -1985,7 +1985,7 @@ struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
  * drops to 0.
  * @cgrp: The cgroup on which a reference is being released.
  */
-void bpf_cgroup_release(struct cgroup *cgrp)
+__bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
 {
 	if (!cgrp)
 		return;
@@ -2000,7 +2000,7 @@ void bpf_cgroup_release(struct cgroup *cgrp)
  * @cgrp: The cgroup for which we're performing a lookup.
  * @level: The level of ancestor to look up.
  */
-struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
+__bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
 {
 	struct cgroup *ancestor;
 
@@ -2019,7 +2019,7 @@ struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
  * stored in a map, or released with bpf_task_release().
  * @pid: The pid of the task being looked up.
  */
-struct task_struct *bpf_task_from_pid(s32 pid)
+__bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
 {
 	struct task_struct *p;
 
@@ -2032,22 +2032,22 @@ struct task_struct *bpf_task_from_pid(s32 pid)
 	return p;
 }
 
-void *bpf_cast_to_kern_ctx(void *obj)
+__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
 {
 	return obj;
 }
 
-void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
+__bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
 {
 	return obj__ign;
 }
 
-void bpf_rcu_read_lock(void)
+__bpf_kfunc void bpf_rcu_read_lock(void)
 {
 	rcu_read_lock();
 }
 
-void bpf_rcu_read_unlock(void)
+__bpf_kfunc void bpf_rcu_read_unlock(void)
 {
 	rcu_read_unlock();
 }
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 793ecff29038..831f1f472bb8 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -26,7 +26,7 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
  * rstat_cpu->updated_children list.  See the comment on top of
  * cgroup_rstat_cpu definition for details.
  */
-void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
+__bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
 {
 	raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
 	unsigned long flags;
@@ -231,7 +231,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
  *
  * This function may block.
  */
-void cgroup_rstat_flush(struct cgroup *cgrp)
+__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
 {
 	might_sleep();
 
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 969e8f52f7da..b1cf259854ca 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -6,6 +6,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/btf.h>
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
@@ -975,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
 }
 STACK_FRAME_NON_STANDARD(__crash_kexec);
 
-void crash_kexec(struct pt_regs *regs)
+__bpf_kfunc void crash_kexec(struct pt_regs *regs)
 {
 	int old_cpu, this_cpu;
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b1eff2efd3b4..ff1458e541a8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1236,7 +1236,7 @@ __diag_ignore_all("-Wmissing-prototypes",
  * Return: a bpf_key pointer with a valid key pointer if the key is found, a
  *         NULL pointer otherwise.
  */
-struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
 {
 	key_ref_t key_ref;
 	struct bpf_key *bkey;
@@ -1285,7 +1285,7 @@ struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
  * Return: a bpf_key pointer with an invalid key pointer set from the
  *         pre-determined ID on success, a NULL pointer otherwise
  */
-struct bpf_key *bpf_lookup_system_key(u64 id)
+__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
 {
 	struct bpf_key *bkey;
 
@@ -1309,7 +1309,7 @@ struct bpf_key *bpf_lookup_system_key(u64 id)
  * Decrement the reference count of the key inside *bkey*, if the pointer
  * is valid, and free *bkey*.
  */
-void bpf_key_put(struct bpf_key *bkey)
+__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
 {
 	if (bkey->has_ref)
 		key_put(bkey->key);
@@ -1329,7 +1329,7 @@ void bpf_key_put(struct bpf_key *bkey)
  *
  * Return: 0 on success, a negative value on error.
  */
-int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
+__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
 			       struct bpf_dynptr_kern *sig_ptr,
 			       struct bpf_key *trusted_keyring)
 {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 7dbefa4fd2eb..af9827c4b351 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -484,7 +484,7 @@ out:
 __diag_push();
 __diag_ignore_all("-Wmissing-prototypes",
 		  "Global functions as their definitions will be in vmlinux BTF");
-int noinline bpf_fentry_test1(int a)
+__bpf_kfunc int bpf_fentry_test1(int a)
 {
 	return a + 1;
 }
@@ -529,23 +529,23 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
 	return (long)arg->a;
 }
 
-int noinline bpf_modify_return_test(int a, int *b)
+__bpf_kfunc int bpf_modify_return_test(int a, int *b)
 {
 	*b += 1;
 	return a + *b;
 }
 
-u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
 {
 	return a + b + c + d;
 }
 
-int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
 {
 	return a + b;
 }
 
-struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
+__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
 {
 	return sk;
 }
@@ -582,21 +582,21 @@ static struct prog_test_ref_kfunc prog_test_struct = {
 	.cnt = REFCOUNT_INIT(1),
 };
 
-noinline struct prog_test_ref_kfunc *
+__bpf_kfunc struct prog_test_ref_kfunc *
 bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
 {
 	refcount_inc(&prog_test_struct.cnt);
 	return &prog_test_struct;
 }
 
-noinline struct prog_test_member *
+__bpf_kfunc struct prog_test_member *
 bpf_kfunc_call_memb_acquire(void)
 {
 	WARN_ON_ONCE(1);
 	return NULL;
 }
 
-noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
 {
 	if (!p)
 		return;
@@ -604,11 +604,11 @@ noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
 	refcount_dec(&p->cnt);
 }
 
-noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
+__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
 {
 }
 
-noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
 {
 	WARN_ON_ONCE(1);
 }
@@ -621,12 +621,14 @@ static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const i
 	return (int *)p;
 }
 
-noinline int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
+						  const int rdwr_buf_size)
 {
 	return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
 }
 
-noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
+						    const int rdonly_buf_size)
 {
 	return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
 }
@@ -636,16 +638,17 @@ noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
  * Acquire functions must return struct pointers, so these ones are
  * failing.
  */
-noinline int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
+						    const int rdonly_buf_size)
 {
 	return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
 }
 
-noinline void bpf_kfunc_call_int_mem_release(int *p)
+__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
 {
 }
 
-noinline struct prog_test_ref_kfunc *
+__bpf_kfunc struct prog_test_ref_kfunc *
 bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
 {
 	struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
@@ -694,47 +697,47 @@ struct prog_test_fail3 {
 	char arr2[];
 };
 
-noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
 {
 }
 
-noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
 {
 }
 
-noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
 {
 }
 
-noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
 {
 }
 
-noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
 {
 }
 
-noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
 {
 }
 
-noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
 {
 }
 
-noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
 {
 }
 
-noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
 {
 }
 
-noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
 {
 }
 
-noinline void bpf_kfunc_call_test_destructive(void)
+__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
 {
 }
 
diff --git a/net/core/xdp.c b/net/core/xdp.c
index a5a7ecf6391c..787fb9f92b36 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
  */
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/btf_ids.h>
 #include <linux/filter.h>
 #include <linux/types.h>
@@ -722,7 +723,7 @@ __diag_ignore_all("-Wmissing-prototypes",
  *
  * Returns 0 on success or ``-errno`` on error.
  */
-int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+__bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
 {
 	return -EOPNOTSUPP;
 }
@@ -734,7 +735,7 @@ int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
  *
  * Returns 0 on success or ``-errno`` on error.
  */
-int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
+__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index d2c470524e58..146792cd26fe 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -295,7 +295,7 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
 }
 
 /* override sysctl_tcp_min_tso_segs */
-static u32 bbr_min_tso_segs(struct sock *sk)
+__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
 {
 	return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
 }
@@ -328,7 +328,7 @@ static void bbr_save_cwnd(struct sock *sk)
 		bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
 }
 
-static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
@@ -1023,7 +1023,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
 	bbr_update_gains(sk);
 }
 
-static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 {
 	struct bbr *bbr = inet_csk_ca(sk);
 	u32 bw;
@@ -1035,7 +1035,7 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
 	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
 }
 
-static void bbr_init(struct sock *sk)
+__bpf_kfunc static void bbr_init(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bbr *bbr = inet_csk_ca(sk);
@@ -1077,7 +1077,7 @@ static void bbr_init(struct sock *sk)
 	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
 }
 
-static u32 bbr_sndbuf_expand(struct sock *sk)
+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
 {
 	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
 	return 3;
@@ -1086,7 +1086,7 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
 /* In theory BBR does not need to undo the cwnd since it does not
  * always reduce cwnd on losses (see bbr_main()). Keep it for now.
  */
-static u32 bbr_undo_cwnd(struct sock *sk)
+__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
 {
 	struct bbr *bbr = inet_csk_ca(sk);
 
@@ -1097,7 +1097,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
 }
 
 /* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
-static u32 bbr_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 bbr_ssthresh(struct sock *sk)
 {
 	bbr_save_cwnd(sk);
 	return tcp_sk(sk)->snd_ssthresh;
@@ -1125,7 +1125,7 @@ static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
 	return 0;
 }
 
-static void bbr_set_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state)
 {
 	struct bbr *bbr = inet_csk_ca(sk);
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d3cae40749e8..db8b4b488c31 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -403,7 +403,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
  * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
  * returns the leftover acks to adjust cwnd in congestion avoidance mode.
  */
-u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
+__bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
 {
 	u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
 
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w),
  * for every packet that was ACKed.
  */
-void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
+__bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
 {
 	/* If credits accumulated at a higher w, apply them gently now. */
 	if (tp->snd_cwnd_cnt >= w) {
@@ -443,7 +443,7 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
 /* This is Jacobson's slow start and congestion avoidance.
  * SIGCOMM '88, p. 328.
  */
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+__bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -462,7 +462,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
 /* Slow start threshold is half the congestion window (min 2) */
-u32 tcp_reno_ssthresh(struct sock *sk)
+__bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -470,7 +470,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
 
-u32 tcp_reno_undo_cwnd(struct sock *sk)
+__bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 768c10c1f649..0fd78ecb67e7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -126,7 +126,7 @@ static inline void bictcp_hystart_reset(struct sock *sk)
 	ca->sample_cnt = 0;
 }
 
-static void cubictcp_init(struct sock *sk)
+__bpf_kfunc static void cubictcp_init(struct sock *sk)
 {
 	struct bictcp *ca = inet_csk_ca(sk);
 
@@ -139,7 +139,7 @@ static void cubictcp_init(struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_TX_START) {
 		struct bictcp *ca = inet_csk_ca(sk);
@@ -321,7 +321,7 @@ tcp_friendliness:
 	ca->cnt = max(ca->cnt, 2U);
 }
 
-static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+__bpf_kfunc static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
@@ -338,7 +338,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 	tcp_cong_avoid_ai(tp, ca->cnt, acked);
 }
 
-static u32 cubictcp_recalc_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 cubictcp_recalc_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
@@ -355,7 +355,7 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
 	return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static void cubictcp_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void cubictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss) {
 		bictcp_reset(inet_csk_ca(sk));
@@ -445,7 +445,7 @@ static void hystart_update(struct sock *sk, u32 delay)
 	}
 }
 
-static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
+__bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e0a2ca7456ff..bb23bb5b387a 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -75,7 +75,7 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
 	ca->old_delivered_ce = tp->delivered_ce;
 }
 
-static void dctcp_init(struct sock *sk)
+__bpf_kfunc static void dctcp_init(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -104,7 +104,7 @@ static void dctcp_init(struct sock *sk)
 	INET_ECN_dontxmit(sk);
 }
 
-static u32 dctcp_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 dctcp_ssthresh(struct sock *sk)
 {
 	struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -113,7 +113,7 @@ static u32 dctcp_ssthresh(struct sock *sk)
 	return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
 }
 
-static void dctcp_update_alpha(struct sock *sk, u32 flags)
+__bpf_kfunc static void dctcp_update_alpha(struct sock *sk, u32 flags)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct dctcp *ca = inet_csk_ca(sk);
@@ -169,7 +169,7 @@ static void dctcp_react_to_loss(struct sock *sk)
 	tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
 }
 
-static void dctcp_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Recovery &&
 	    new_state != inet_csk(sk)->icsk_ca_state)
@@ -179,7 +179,7 @@ static void dctcp_state(struct sock *sk, u8 new_state)
 	 */
 }
 
-static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+__bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 {
 	struct dctcp *ca = inet_csk_ca(sk);
 
@@ -229,7 +229,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
 	return 0;
 }
 
-static u32 dctcp_cwnd_undo(struct sock *sk)
+__bpf_kfunc static u32 dctcp_cwnd_undo(struct sock *sk)
 {
 	const struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 24002bc61e07..34913521c385 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -249,7 +249,7 @@ __diag_ignore_all("-Wmissing-prototypes",
  * @opts__sz	- Length of the bpf_ct_opts structure
  *		    Must be NF_BPF_CT_OPTS_SZ (12)
  */
-struct nf_conn___init *
+__bpf_kfunc struct nf_conn___init *
 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
 		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
 {
@@ -283,7 +283,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts__sz	- Length of the bpf_ct_opts structure
  *		    Must be NF_BPF_CT_OPTS_SZ (12)
  */
-struct nf_conn *
+__bpf_kfunc struct nf_conn *
 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
 		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
 {
@@ -316,7 +316,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts__sz	- Length of the bpf_ct_opts structure
  *		    Must be NF_BPF_CT_OPTS_SZ (12)
  */
-struct nf_conn___init *
+__bpf_kfunc struct nf_conn___init *
 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
 		 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
 {
@@ -351,7 +351,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts__sz	- Length of the bpf_ct_opts structure
  *		    Must be NF_BPF_CT_OPTS_SZ (12)
  */
-struct nf_conn *
+__bpf_kfunc struct nf_conn *
 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
 		  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
 {
@@ -376,7 +376,7 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @nfct	 - Pointer to referenced nf_conn___init object, obtained
  *		   using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
  */
-struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
+__bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
 {
 	struct nf_conn *nfct = (struct nf_conn *)nfct_i;
 	int err;
@@ -400,7 +400,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
  * @nf_conn	 - Pointer to referenced nf_conn object, obtained using
  *		   bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
  */
-void bpf_ct_release(struct nf_conn *nfct)
+__bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
 {
 	if (!nfct)
 		return;
@@ -417,7 +417,7 @@ void bpf_ct_release(struct nf_conn *nfct)
  *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
  * @timeout      - Timeout in msecs.
  */
-void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
+__bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
 {
 	__nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
 }
@@ -432,7 +432,7 @@ void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
  *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
  * @timeout      - New timeout in msecs.
  */
-int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
+__bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
 {
 	return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
 }
@@ -447,7 +447,7 @@ int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
  *		   bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
  * @status       - New status value.
  */
-int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
+__bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
 {
 	return nf_ct_change_status_common((struct nf_conn *)nfct, status);
 }
@@ -462,7 +462,7 @@ int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
  *		   bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
  * @status       - New status value.
  */
-int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
+__bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
 {
 	return nf_ct_change_status_common(nfct, status);
 }
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 0fa5a0bbb0ff..141ee7783223 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -30,9 +30,9 @@ __diag_ignore_all("-Wmissing-prototypes",
  *		  interpreted as select a random port.
  * @manip	- NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
  */
-int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
-			union nf_inet_addr *addr, int port,
-			enum nf_nat_manip_type manip)
+__bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
+				    union nf_inet_addr *addr, int port,
+				    enum nf_nat_manip_type manip)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
 	u16 proto = nf_ct_l3num(ct);
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index 1ef2162cebcf..d74f3fd20f2b 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -39,8 +39,7 @@ __diag_ignore_all("-Wmissing-prototypes",
  * @to		- Pointer to memory to which the metadata will be copied
  *		    Cannot be NULL
  */
-__used noinline
-int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
+__bpf_kfunc int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
 {
 	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
 	struct xfrm_md_info *info;
@@ -62,9 +61,7 @@ int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
  * @from	- Pointer to memory from which the metadata will be copied
  *		    Cannot be NULL
  */
-__used noinline
-int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
-			  const struct bpf_xfrm_info *from)
+__bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bpf_xfrm_info *from)
 {
 	struct sk_buff *skb = (struct sk_buff *)skb_ctx;
 	struct metadata_dst *md_dst;
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 5085fea3cac5..46500636d8cd 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -59,7 +59,7 @@ bpf_testmod_test_struct_arg_5(void) {
 	return bpf_testmod_test_struct_arg_result;
 }
 
-noinline void
+__bpf_kfunc void
 bpf_testmod_test_mod_kfunc(int i)
 {
 	*(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
-- 
cgit v1.2.3


From 6aed15e330bfec6a423f40582b2a8b53d9ce1757 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 1 Feb 2023 11:30:16 -0600
Subject: selftests/bpf: Add testcase for static kfunc with unused arg

kfuncs are allowed to be static, or not use one or more of their
arguments. For example, bpf_xdp_metadata_rx_hash() in net/core/xdp.c is
meant to be implemented by drivers, with the default implementation just
returning -EOPNOTSUPP. As described in [0], such kfuncs can have their
arguments elided, which can cause BTF encoding to be skipped. The new
__bpf_kfunc macro should address this, and this patch adds a selftest
which verifies that a static kfunc with at least one unused argument can
still be encoded and invoked by a BPF program.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230201173016.342758-5-void@manifault.com
---
 net/bpf/test_run.c                                  |  6 ++++++
 tools/testing/selftests/bpf/prog_tests/kfunc_call.c |  1 +
 tools/testing/selftests/bpf/progs/kfunc_call_test.c | 11 +++++++++++
 3 files changed, 18 insertions(+)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index af9827c4b351..e6f773d12045 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -741,6 +741,11 @@ __bpf_kfunc void bpf_kfunc_call_test_destructive(void)
 {
 }
 
+__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
+{
+	return arg;
+}
+
 __diag_pop();
 
 BTF_SET8_START(bpf_test_modify_return_ids)
@@ -779,6 +784,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
 BTF_SET8_END(test_sk_check_kfunc_ids)
 
 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index bb4cd82a788a..a543742cd7bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -77,6 +77,7 @@ static struct kfunc_test_params kfunc_tests[] = {
 	TC_TEST(kfunc_call_test_get_mem, 42),
 	SYSCALL_TEST(kfunc_syscall_test, 0),
 	SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0),
+	TC_TEST(kfunc_call_test_static_unused_arg, 0),
 };
 
 struct syscall_test_args {
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index d91c58d06d38..7daa8f5720b9 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -17,6 +17,7 @@ extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
 extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
 extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
 extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym;
 
 SEC("tc")
 int kfunc_call_test4(struct __sk_buff *skb)
@@ -181,4 +182,14 @@ int kfunc_call_test_get_mem(struct __sk_buff *skb)
 	return ret;
 }
 
+SEC("tc")
+int kfunc_call_test_static_unused_arg(struct __sk_buff *skb)
+{
+
+	u32 expected = 5, actual;
+
+	actual = bpf_kfunc_call_test_static_unused_arg(expected, 0xdeadbeef);
+	return actual != expected ? -1 : 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From f2922f77a6a6e6c549f1fd639ec01d2f7999fbc0 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Feb 2023 19:12:54 +0100
Subject: selftests/bpf: Fix unmap bug in prog_tests/xdp_metadata.c

The function close_xsk() unmap via munmap() the wrong memory pointer.
The call xsk_umem__delete(xsk->umem) have already freed xsk->umem.
Thus the call to munmap(xsk->umem, UMEM_SIZE) will have unpredictable
behavior that can lead to Segmentation fault elsewhere, as man page
explain subsequent references to these pages will generate SIGSEGV.

Fixes: e2a46d54d7a1 ("selftests/bpf: Verify xdp_metadata xdp->af_xdp path")
Reported-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/167527517464.938135.13750760520577765269.stgit@firesoul
---
 tools/testing/selftests/bpf/prog_tests/xdp_metadata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index e033d48288c0..241909d71c7e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -121,7 +121,7 @@ static void close_xsk(struct xsk *xsk)
 		xsk_umem__delete(xsk->umem);
 	if (xsk->socket)
 		xsk_socket__delete(xsk->socket);
-	munmap(xsk->umem, UMEM_SIZE);
+	munmap(xsk->umem_area, UMEM_SIZE);
 }
 
 static void ip_csum(struct iphdr *iph)
-- 
cgit v1.2.3


From 3fd9dcd689a5582e682fde57b3139066d032367a Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Feb 2023 18:31:50 +0100
Subject: selftests/bpf: xdp_hw_metadata clear metadata when -EOPNOTSUPP

The AF_XDP userspace part of xdp_hw_metadata see non-zero as a signal of
the availability of rx_timestamp and rx_hash in data_meta area. The
kernel-side BPF-prog code doesn't initialize these members when kernel
returns an error e.g. -EOPNOTSUPP.  This memory area is not guaranteed to
be zeroed, and can contain garbage/previous values, which will be read
and interpreted by AF_XDP userspace side.

Tested this on different drivers. The experiences are that for most
packets they will have zeroed this data_meta area, but occasionally it
will contain garbage data.

Example of failure tested on ixgbe:

 poll: 1 (0)
 xsk_ring_cons__peek: 1
 0x18ec788: rx_desc[0]->addr=100000000008000 addr=8100 comp_addr=8000
 rx_hash: 3697961069
 rx_timestamp:  9024981991734834796 (sec:9024981991.7348)
 0x18ec788: complete idx=8 addr=8000

Converting to date:

 date -d @9024981991
 2255-12-28T20:26:31 CET

I choose a simple fix in this patch. When kfunc fails or isn't supported
assign zero to the corresponding struct meta value.

It's up to the individual BPF-programmer to do something smarter e.g.
that fits their use-case, like getting a software timestamp and marking
a flag that gives the type of timestamp.

Fixes: 297a3f124155 ("selftests/bpf: Simple program to dump XDP RX metadata")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/167527271027.937063.5177725618616476592.stgit@firesoul
---
 tools/testing/selftests/bpf/progs/xdp_hw_metadata.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index 25b8178735ee..4c55b4d79d3d 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -70,10 +70,14 @@ int rx(struct xdp_md *ctx)
 	}
 
 	if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp))
-		bpf_printk("populated rx_timestamp with %u", meta->rx_timestamp);
+		bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp);
+	else
+		meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
 
 	if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash))
 		bpf_printk("populated rx_hash with %u", meta->rx_hash);
+	else
+		meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */
 
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
 }
-- 
cgit v1.2.3


From a19a62e56478ba4afadfa7df94d0819542b7ccf8 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Feb 2023 18:31:55 +0100
Subject: selftests/bpf: xdp_hw_metadata cleanup cause segfault

Using xdp_hw_metadata I experince Segmentation fault after seeing
"detaching bpf program....".

On my system the segfault happened when accessing bpf_obj->skeleton
in xdp_hw_metadata__destroy(bpf_obj) call. That doesn't make any sense
as this memory have not been freed by program at this point in time.

Prior to calling xdp_hw_metadata__destroy(bpf_obj) the function
close_xsk() is called for each RX-queue xsk.  The real bug lays
in close_xsk() that unmap via munmap() the wrong memory pointer.
The call xsk_umem__delete(xsk->umem) will free xsk->umem, thus
the call to munmap(xsk->umem, UMEM_SIZE) will have unpredictable
behavior. And man page explain subsequent references to these
pages will generate SIGSEGV.

Unmapping xsk->umem_area instead removes the segfault.

Fixes: 297a3f124155 ("selftests/bpf: Simple program to dump XDP RX metadata")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/167527271533.937063.5717065138099679142.stgit@firesoul
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 3823b1c499cc..438083e34cce 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -121,7 +121,7 @@ static void close_xsk(struct xsk *xsk)
 		xsk_umem__delete(xsk->umem);
 	if (xsk->socket)
 		xsk_socket__delete(xsk->socket);
-	munmap(xsk->umem, UMEM_SIZE);
+	munmap(xsk->umem_area, UMEM_SIZE);
 }
 
 static void refill_rx(struct xsk *xsk, __u64 addr)
-- 
cgit v1.2.3


From 7bd4224deecd2d917fcbb52f9d13ab1453be219a Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Feb 2023 18:32:00 +0100
Subject: selftests/bpf: xdp_hw_metadata correct status value in error(3)

The glibc error reporting function error():

 void error(int status, int errnum, const char *format, ...);

The status argument should be a positive value between 0-255 as it
is passed over to the exit(3) function as the value as the shell exit
status. The least significant byte of status (i.e., status & 0xFF) is
returned to the shell parent.

Fix this by using 1 instead of -1. As 1 corresponds to C standard
constant EXIT_FAILURE.

Fixes: 297a3f124155 ("selftests/bpf: Simple program to dump XDP RX metadata")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/167527272038.937063.9137108142012298120.stgit@firesoul
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 28 +++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 438083e34cce..58fde35abad7 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -165,7 +165,7 @@ static void verify_skb_metadata(int fd)
 	hdr.msg_controllen = sizeof(cmsg_buf);
 
 	if (recvmsg(fd, &hdr, 0) < 0)
-		error(-1, errno, "recvmsg");
+		error(1, errno, "recvmsg");
 
 	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
 	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
@@ -275,11 +275,11 @@ static int rxq_num(const char *ifname)
 
 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (fd < 0)
-		error(-1, errno, "socket");
+		error(1, errno, "socket");
 
 	ret = ioctl(fd, SIOCETHTOOL, &ifr);
 	if (ret < 0)
-		error(-1, errno, "ioctl(SIOCETHTOOL)");
+		error(1, errno, "ioctl(SIOCETHTOOL)");
 
 	close(fd);
 
@@ -296,11 +296,11 @@ static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *c
 
 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
 	if (fd < 0)
-		error(-1, errno, "socket");
+		error(1, errno, "socket");
 
 	ret = ioctl(fd, op, &ifr);
 	if (ret < 0)
-		error(-1, errno, "ioctl(%d)", op);
+		error(1, errno, "ioctl(%d)", op);
 
 	close(fd);
 }
@@ -360,7 +360,7 @@ static void timestamping_enable(int fd, int val)
 
 	ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
 	if (ret < 0)
-		error(-1, errno, "setsockopt(SO_TIMESTAMPING)");
+		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
 }
 
 int main(int argc, char *argv[])
@@ -386,13 +386,13 @@ int main(int argc, char *argv[])
 
 	rx_xsk = malloc(sizeof(struct xsk) * rxq);
 	if (!rx_xsk)
-		error(-1, ENOMEM, "malloc");
+		error(1, ENOMEM, "malloc");
 
 	for (i = 0; i < rxq; i++) {
 		printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
 		ret = open_xsk(ifindex, &rx_xsk[i], i);
 		if (ret)
-			error(-1, -ret, "open_xsk");
+			error(1, -ret, "open_xsk");
 
 		printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
 	}
@@ -400,7 +400,7 @@ int main(int argc, char *argv[])
 	printf("open bpf program...\n");
 	bpf_obj = xdp_hw_metadata__open();
 	if (libbpf_get_error(bpf_obj))
-		error(-1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
+		error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
 
 	prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
 	bpf_program__set_ifindex(prog, ifindex);
@@ -409,12 +409,12 @@ int main(int argc, char *argv[])
 	printf("load bpf program...\n");
 	ret = xdp_hw_metadata__load(bpf_obj);
 	if (ret)
-		error(-1, -ret, "xdp_hw_metadata__load");
+		error(1, -ret, "xdp_hw_metadata__load");
 
 	printf("prepare skb endpoint...\n");
 	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
 	if (server_fd < 0)
-		error(-1, errno, "start_server");
+		error(1, errno, "start_server");
 	timestamping_enable(server_fd,
 			    SOF_TIMESTAMPING_SOFTWARE |
 			    SOF_TIMESTAMPING_RAW_HARDWARE);
@@ -427,7 +427,7 @@ int main(int argc, char *argv[])
 		printf("map[%d] = %d\n", queue_id, sock_fd);
 		ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
 		if (ret)
-			error(-1, -ret, "bpf_map_update_elem");
+			error(1, -ret, "bpf_map_update_elem");
 	}
 
 	printf("attach bpf program...\n");
@@ -435,12 +435,12 @@ int main(int argc, char *argv[])
 			     bpf_program__fd(bpf_obj->progs.rx),
 			     XDP_FLAGS, NULL);
 	if (ret)
-		error(-1, -ret, "bpf_xdp_attach");
+		error(1, -ret, "bpf_xdp_attach");
 
 	signal(SIGINT, handle_signal);
 	ret = verify_metadata(rx_xsk, rxq, server_fd);
 	close(server_fd);
 	cleanup();
 	if (ret)
-		error(-1, -ret, "verify_metadata");
+		error(1, -ret, "verify_metadata");
 }
-- 
cgit v1.2.3


From e8a3c8bd687068bafb640ca524905f0bec716a13 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Feb 2023 18:32:05 +0100
Subject: selftests/bpf: xdp_hw_metadata use strncpy for ifname

The ifname char pointer is taken directly from the command line
as input and the string is copied directly into struct ifreq
via strcpy. This makes it easy to corrupt other members of ifreq
and generally do stack overflows.

Most often the ioctl will fail with:

 ./xdp_hw_metadata: ioctl(SIOCETHTOOL): Bad address

As people will likely copy-paste code for getting NIC queue
channels (rxq_num) and enabling HW timestamping (hwtstamp_ioctl)
lets make this code a bit more secure by using strncpy.

Fixes: 297a3f124155 ("selftests/bpf: Simple program to dump XDP RX metadata")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/167527272543.937063.16993147790832546209.stgit@firesoul
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 58fde35abad7..2a66bd3f2c9f 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -270,7 +270,7 @@ static int rxq_num(const char *ifname)
 	struct ifreq ifr = {
 		.ifr_data = (void *)&ch,
 	};
-	strcpy(ifr.ifr_name, ifname);
+	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
 	int fd, ret;
 
 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
@@ -291,7 +291,7 @@ static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *c
 	struct ifreq ifr = {
 		.ifr_data = (void *)cfg,
 	};
-	strcpy(ifr.ifr_name, ifname);
+	strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
 	int fd, ret;
 
 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
-- 
cgit v1.2.3


From 8b79b34a66cd61769802255a2a6bdd446d4f93ca Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 1 Feb 2023 15:36:40 -0800
Subject: selftests/bpf: Don't refill on completion in xdp_metadata

We only need to consume TX completion instead of refilling 'fill' ring.
It's currently not an issue because we never RX more than 8 packets.

Fixes: e2a46d54d7a1 ("selftests/bpf: Verify xdp_metadata xdp->af_xdp path")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230201233640.367646-1-sdf@google.com
---
 tools/testing/selftests/bpf/prog_tests/xdp_metadata.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 241909d71c7e..aa4beae99f4f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -205,9 +205,8 @@ static void complete_tx(struct xsk *xsk)
 	if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
 		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
 
-		printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
-		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
-		xsk_ring_prod__submit(&xsk->fill, 1);
+		printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+		xsk_ring_cons__release(&xsk->comp, 1);
 	}
 }
 
-- 
cgit v1.2.3


From 4bc32df7a9c387ea1a1b7336e2c7d44aa8cee9f4 Mon Sep 17 00:00:00 2001
From: Ye Xingchen <ye.xingchen@zte.com.cn>
Date: Tue, 31 Jan 2023 14:40:51 +0800
Subject: selftests/bpf: Remove duplicate include header in xdp_hw_metadata

The linux/net_tstamp.h is included more than once, thus clean it up.

Signed-off-by: Ye Xingchen <ye.xingchen@zte.com.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/202301311440516312161@zte.com.cn
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 2a66bd3f2c9f..1c8acb68b977 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -24,7 +24,6 @@
 #include <linux/net_tstamp.h>
 #include <linux/udp.h>
 #include <linux/sockios.h>
-#include <linux/net_tstamp.h>
 #include <sys/mman.h>
 #include <net/if.h>
 #include <poll.h>
-- 
cgit v1.2.3


From 354bb4a0e0b6be8f55bacbe7f08c94b4741f5658 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 3 Feb 2023 00:53:35 +0100
Subject: selftests/bpf: Initialize tc in xdp_synproxy

xdp_synproxy/xdp fails in CI with:

    Error: bpf_tc_hook_create: File exists

The XDP version of the test should not be calling bpf_tc_hook_create();
the reason it's happening anyway is that if we don't specify --tc on the
command line, tc variable remains uninitialized.

Fixes: 784d5dc0efc2 ("selftests/bpf: Add selftests for raw syncookie helpers in TC mode")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Reported-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/r/20230202235335.3403781-1-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdp_synproxy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
index 410a1385a01d..6dbe0b745198 100644
--- a/tools/testing/selftests/bpf/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -116,6 +116,7 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *
 	*tcpipopts = 0;
 	*ports = NULL;
 	*single = false;
+	*tc = false;
 
 	while (true) {
 		int opt;
-- 
cgit v1.2.3


From 150809082aab95f7078f343c1c82770d6c9ebe68 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Thu, 2 Feb 2023 14:31:26 +0800
Subject: selftests/bpf: Use semicolon instead of comma in test_verifier.c

Just silence the following checkpatch warning:

WARNING: Possible comma where semicolon could be used

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Link: https://lore.kernel.org/r/1675319486-27744-3-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 8c808551dfd7..887c49dc5abd 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -209,7 +209,7 @@ loop:
 		insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
 		insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-					 BPF_FUNC_skb_vlan_push),
+					 BPF_FUNC_skb_vlan_push);
 		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
 		i++;
 	}
@@ -220,7 +220,7 @@ loop:
 		i++;
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-					 BPF_FUNC_skb_vlan_pop),
+					 BPF_FUNC_skb_vlan_pop);
 		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
 		i++;
 	}
-- 
cgit v1.2.3


From 84050074e51b68e6f1d3e89ebe8f8f6d73472ec3 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 1 Feb 2023 11:24:23 +0100
Subject: selftests/bpf: add test for bpf_xdp_query xdp-features support

Introduce a self-test to verify libbpf bpf_xdp_query capability to dump
the xdp-features supported by the device (lo and veth in this case).

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/534550318a2c883e174811683909544c63632f05.1675245258.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_do_redirect.c     | 27 +++++++++++++++++++++-
 tools/testing/selftests/bpf/prog_tests/xdp_info.c  |  8 +++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index ac70e871d62f..2666c84dbd01 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -4,10 +4,12 @@
 #include <net/if.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
+#include <linux/if_link.h>
 #include <linux/ipv6.h>
 #include <linux/in6.h>
 #include <linux/udp.h>
 #include <bpf/bpf_endian.h>
+#include <uapi/linux/netdev.h>
 #include "test_xdp_do_redirect.skel.h"
 
 #define SYS(fmt, ...)						\
@@ -96,7 +98,7 @@ void test_xdp_do_redirect(void)
 	struct test_xdp_do_redirect *skel = NULL;
 	struct nstoken *nstoken = NULL;
 	struct bpf_link *link;
-
+	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
 	struct xdp_md ctx_in = { .data = sizeof(__u32),
 				 .data_end = sizeof(data) };
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
@@ -157,6 +159,29 @@ void test_xdp_do_redirect(void)
 	    !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
 		goto out;
 
+	/* Check xdp features supported by veth driver */
+	err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
+	if (!ASSERT_OK(err, "veth_src bpf_xdp_query"))
+		goto out;
+
+	if (!ASSERT_EQ(query_opts.feature_flags,
+		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+		       NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+		       NETDEV_XDP_ACT_NDO_XMIT_SG,
+		       "veth_src query_opts.feature_flags"))
+		goto out;
+
+	err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
+	if (!ASSERT_OK(err, "veth_dst bpf_xdp_query"))
+		goto out;
+
+	if (!ASSERT_EQ(query_opts.feature_flags,
+		       NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+		       NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+		       NETDEV_XDP_ACT_NDO_XMIT_SG,
+		       "veth_dst query_opts.feature_flags"))
+		goto out;
+
 	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
 	skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
 	skel->rodata->ifindex_in = ifindex_src;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index cd3aa340e65e..286c21ecdc65 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -8,6 +8,7 @@ void serial_test_xdp_info(void)
 {
 	__u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id;
 	const char *file = "./xdp_dummy.bpf.o";
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
 	struct bpf_prog_info info = {};
 	struct bpf_object *obj;
 	int err, prog_fd;
@@ -61,6 +62,13 @@ void serial_test_xdp_info(void)
 	if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
 		goto out;
 
+	/* Check xdp features supported by lo device */
+	opts.feature_flags = ~0;
+	err = bpf_xdp_query(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &opts);
+	if (!ASSERT_OK(err, "bpf_xdp_query"))
+		goto out;
+
+	ASSERT_EQ(opts.feature_flags, 0, "opts.feature_flags");
 out:
 	bpf_xdp_detach(IFINDEX_LO, 0, NULL);
 out_close:
-- 
cgit v1.2.3


From 4dba3e7852b7717a20ba206ab23ad289a0a5c504 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 1 Feb 2023 11:24:24 +0100
Subject: selftests/bpf: introduce XDP compliance test tool

Introduce xdp_features tool in order to test XDP features supported by
the NIC and match them against advertised ones.
In order to test supported/advertised XDP features, xdp_features must
run on the Device Under Test (DUT) and on a Tester device.
xdp_features opens a control TCP channel between DUT and Tester devices
to send control commands from Tester to the DUT and a UDP data channel
where the Tester sends UDP 'echo' packets and the DUT is expected to
reply back with the same packet. DUT installs multiple XDP programs on the
NIC to test XDP capabilities and reports back to the Tester some XDP stats.
Currently xdp_features supports the following XDP features:
- XDP_DROP
- XDP_ABORTED
- XDP_PASS
- XDP_TX
- XDP_REDIRECT
- XDP_NDO_XMIT

Co-developed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/7c1af8e7e6ef0614cf32fa9e6bdaa2d8d605f859.1675245258.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore           |   1 +
 tools/testing/selftests/bpf/Makefile             |  11 +-
 tools/testing/selftests/bpf/progs/xdp_features.c | 269 +++++++++
 tools/testing/selftests/bpf/test_xdp_features.sh | 107 ++++
 tools/testing/selftests/bpf/xdp_features.c       | 699 +++++++++++++++++++++++
 tools/testing/selftests/bpf/xdp_features.h       |  20 +
 6 files changed, 1105 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_features.c
 create mode 100755 tools/testing/selftests/bpf/test_xdp_features.sh
 create mode 100644 tools/testing/selftests/bpf/xdp_features.c
 create mode 100644 tools/testing/selftests/bpf/xdp_features.h

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 4aa5bba956ff..116fecf80ca1 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -48,3 +48,4 @@ xskxceiver
 xdp_redirect_multi
 xdp_synproxy
 xdp_hw_metadata
+xdp_features
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e79039726173..b2eb3201b85a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -73,7 +73,8 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
 	test_doc_build.sh \
-	test_xsk.sh
+	test_xsk.sh \
+	test_xdp_features.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
@@ -83,7 +84,8 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
 	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-	xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata
+	xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
+	xdp_features
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file
 TEST_GEN_FILES += liburandom_read.so
@@ -385,6 +387,7 @@ test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton
 test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
 xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
 xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
+xdp_features.skel.h-deps := xdp_features.bpf.o
 
 LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
@@ -587,6 +590,10 @@ $(OUTPUT)/xdp_hw_metadata: xdp_hw_metadata.c $(OUTPUT)/network_helpers.o $(OUTPU
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
+$(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp_features.skel.h | $(OUTPUT)
+	$(call msg,BINARY,,$@)
+	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
 # Make sure we are able to include and link libbpf against c++.
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
diff --git a/tools/testing/selftests/bpf/progs/xdp_features.c b/tools/testing/selftests/bpf/progs/xdp_features.c
new file mode 100644
index 000000000000..87c247d56f72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_features.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/netdev.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <asm-generic/errno-base.h>
+
+#include "xdp_features.h"
+
+#define ipv6_addr_equal(a, b)	((a).s6_addr32[0] == (b).s6_addr32[0] &&	\
+				 (a).s6_addr32[1] == (b).s6_addr32[1] &&	\
+				 (a).s6_addr32[2] == (b).s6_addr32[2] &&	\
+				 (a).s6_addr32[3] == (b).s6_addr32[3])
+
+struct net_device;
+struct bpf_prog;
+
+struct xdp_cpumap_stats {
+	unsigned int redirect;
+	unsigned int pass;
+	unsigned int drop;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 1);
+} stats SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 1);
+} dut_stats SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CPUMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_cpumap_val));
+	__uint(max_entries, 1);
+} cpu_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__uint(max_entries, 1);
+} dev_map SEC(".maps");
+
+const volatile struct in6_addr tester_addr;
+const volatile struct in6_addr dut_addr;
+
+static __always_inline int
+xdp_process_echo_packet(struct xdp_md *xdp, bool dut)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eh = data;
+	struct tlv_hdr *tlv;
+	struct udphdr *uh;
+	__be16 port;
+	__u8 *cmd;
+
+	if (eh + 1 > (struct ethhdr *)data_end)
+		return -EINVAL;
+
+	if (eh->h_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *ih = (struct iphdr *)(eh + 1);
+		__be32 saddr = dut ? tester_addr.s6_addr32[3]
+				   : dut_addr.s6_addr32[3];
+		__be32 daddr = dut ? dut_addr.s6_addr32[3]
+				   : tester_addr.s6_addr32[3];
+
+		ih = (struct iphdr *)(eh + 1);
+		if (ih + 1 > (struct iphdr *)data_end)
+			return -EINVAL;
+
+		if (saddr != ih->saddr)
+			return -EINVAL;
+
+		if (daddr != ih->daddr)
+			return -EINVAL;
+
+		if (ih->protocol != IPPROTO_UDP)
+			return -EINVAL;
+
+		uh = (struct udphdr *)(ih + 1);
+	} else if (eh->h_proto == bpf_htons(ETH_P_IPV6)) {
+		struct in6_addr saddr = dut ? tester_addr : dut_addr;
+		struct in6_addr daddr = dut ? dut_addr : tester_addr;
+		struct ipv6hdr *ih6 = (struct ipv6hdr *)(eh + 1);
+
+		if (ih6 + 1 > (struct ipv6hdr *)data_end)
+			return -EINVAL;
+
+		if (!ipv6_addr_equal(saddr, ih6->saddr))
+			return -EINVAL;
+
+		if (!ipv6_addr_equal(daddr, ih6->daddr))
+			return -EINVAL;
+
+		if (ih6->nexthdr != IPPROTO_UDP)
+			return -EINVAL;
+
+		uh = (struct udphdr *)(ih6 + 1);
+	} else {
+		return -EINVAL;
+	}
+
+	if (uh + 1 > (struct udphdr *)data_end)
+		return -EINVAL;
+
+	port = dut ? uh->dest : uh->source;
+	if (port != bpf_htons(DUT_ECHO_PORT))
+		return -EINVAL;
+
+	tlv = (struct tlv_hdr *)(uh + 1);
+	if (tlv + 1 > data_end)
+		return -EINVAL;
+
+	return bpf_htons(tlv->type) == CMD_ECHO ? 0 : -EINVAL;
+}
+
+static __always_inline int
+xdp_update_stats(struct xdp_md *xdp, bool tx, bool dut)
+{
+	__u32 *val, key = 0;
+
+	if (xdp_process_echo_packet(xdp, tx))
+		return -EINVAL;
+
+	if (dut)
+		val = bpf_map_lookup_elem(&dut_stats, &key);
+	else
+		val = bpf_map_lookup_elem(&stats, &key);
+
+	if (val)
+		__sync_add_and_fetch(val, 1);
+
+	return 0;
+}
+
+/* Tester */
+
+SEC("xdp")
+int xdp_tester_check_tx(struct xdp_md *xdp)
+{
+	xdp_update_stats(xdp, true, false);
+
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_tester_check_rx(struct xdp_md *xdp)
+{
+	xdp_update_stats(xdp, false, false);
+
+	return XDP_PASS;
+}
+
+/* DUT */
+
+SEC("xdp")
+int xdp_do_pass(struct xdp_md *xdp)
+{
+	xdp_update_stats(xdp, true, true);
+
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_do_drop(struct xdp_md *xdp)
+{
+	if (xdp_update_stats(xdp, true, true))
+		return XDP_PASS;
+
+	return XDP_DROP;
+}
+
+SEC("xdp")
+int xdp_do_aborted(struct xdp_md *xdp)
+{
+	if (xdp_process_echo_packet(xdp, true))
+		return XDP_PASS;
+
+	return XDP_ABORTED;
+}
+
+SEC("xdp")
+int xdp_do_tx(struct xdp_md *xdp)
+{
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eh = data;
+	__u8 tmp_mac[ETH_ALEN];
+
+	if (xdp_update_stats(xdp, true, true))
+		return XDP_PASS;
+
+	__builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN);
+	__builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN);
+	__builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN);
+
+	return XDP_TX;
+}
+
+SEC("xdp")
+int xdp_do_redirect(struct xdp_md *xdp)
+{
+	if (xdp_process_echo_packet(xdp, true))
+		return XDP_PASS;
+
+	return bpf_redirect_map(&cpu_map, 0, 0);
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(xdp_exception, const struct net_device *dev,
+	     const struct bpf_prog *xdp, __u32 act)
+{
+	__u32 *val, key = 0;
+
+	val = bpf_map_lookup_elem(&dut_stats, &key);
+	if (val)
+		__sync_add_and_fetch(val, 1);
+
+	return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+	     unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+	__u32 *val, key = 0;
+
+	val = bpf_map_lookup_elem(&dut_stats, &key);
+	if (val)
+		__sync_add_and_fetch(val, 1);
+
+	return 0;
+}
+
+SEC("xdp/cpumap")
+int xdp_do_redirect_cpumap(struct xdp_md *xdp)
+{
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eh = data;
+	__u8 tmp_mac[ETH_ALEN];
+
+	if (xdp_process_echo_packet(xdp, true))
+		return XDP_PASS;
+
+	__builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN);
+	__builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN);
+	__builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN);
+
+	return bpf_redirect_map(&dev_map, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_features.sh b/tools/testing/selftests/bpf/test_xdp_features.sh
new file mode 100755
index 000000000000..0aa71c4455c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_features.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NS="ns1-$(mktemp -u XXXXXX)"
+readonly V0_IP4=10.10.0.11
+readonly V1_IP4=10.10.0.1
+readonly V0_IP6=2001:db8::11
+readonly V1_IP6=2001:db8::1
+
+ret=1
+
+setup() {
+	{
+		ip netns add ${NS}
+
+		ip link add v1 type veth peer name v0 netns ${NS}
+
+		ip link set v1 up
+		ip addr add $V1_IP4/24 dev v1
+		ip addr add $V1_IP6/64 nodad dev v1
+		ip -n ${NS} link set dev v0 up
+		ip -n ${NS} addr add $V0_IP4/24 dev v0
+		ip -n ${NS} addr add $V0_IP6/64 nodad dev v0
+
+		# Enable XDP mode and disable checksum offload
+		ethtool -K v1 gro on
+		ethtool -K v1 tx-checksumming off
+		ip netns exec ${NS} ethtool -K v0 gro on
+		ip netns exec ${NS} ethtool -K v0 tx-checksumming off
+	} > /dev/null 2>&1
+}
+
+cleanup() {
+	ip link del v1 2> /dev/null
+	ip netns del ${NS} 2> /dev/null
+	[ "$(pidof xdp_features)" = "" ] || kill $(pidof xdp_features) 2> /dev/null
+}
+
+wait_for_dut_server() {
+	while sleep 1; do
+		ss -tlp | grep -q xdp_features
+		[ $? -eq 0 ] && break
+	done
+}
+
+test_xdp_features() {
+	setup
+
+	## XDP_PASS
+	./xdp_features -f XDP_PASS -D $V1_IP6 -T $V0_IP6 v1 &
+	wait_for_dut_server
+	ip netns exec ${NS} ./xdp_features -t -f XDP_PASS \
+					   -D $V1_IP6 -C $V1_IP6 \
+					   -T $V0_IP6 v0
+	[ $? -ne 0 ] && exit
+
+	## XDP_DROP
+	./xdp_features -f XDP_DROP -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+	wait_for_dut_server
+	ip netns exec ${NS} ./xdp_features -t -f XDP_DROP \
+					   -D ::ffff:$V1_IP4 \
+					   -C ::ffff:$V1_IP4 \
+					   -T ::ffff:$V0_IP4 v0
+	[ $? -ne 0 ] && exit
+
+	## XDP_ABORTED
+	./xdp_features -f XDP_ABORTED -D $V1_IP6 -T $V0_IP6 v1 &
+	wait_for_dut_server
+	ip netns exec ${NS} ./xdp_features -t -f XDP_ABORTED \
+					   -D $V1_IP6 -C $V1_IP6 \
+					   -T $V0_IP6 v0
+	[ $? -ne 0 ] && exit
+
+	## XDP_TX
+	./xdp_features -f XDP_TX -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+	wait_for_dut_server
+	ip netns exec ${NS} ./xdp_features -t -f XDP_TX \
+					   -D ::ffff:$V1_IP4 \
+					   -C ::ffff:$V1_IP4 \
+					   -T ::ffff:$V0_IP4 v0
+	[ $? -ne 0 ] && exit
+
+	## XDP_REDIRECT
+	./xdp_features -f XDP_REDIRECT -D $V1_IP6 -T $V0_IP6 v1 &
+	wait_for_dut_server
+	ip netns exec ${NS} ./xdp_features -t -f XDP_REDIRECT \
+					   -D $V1_IP6 -C $V1_IP6 \
+					   -T $V0_IP6 v0
+	[ $? -ne 0 ] && exit
+
+	## XDP_NDO_XMIT
+	./xdp_features -f XDP_NDO_XMIT -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+	wait_for_dut_server
+	ip netns exec ${NS} ./xdp_features -t -f XDP_NDO_XMIT \
+					   -D ::ffff:$V1_IP4 \
+					   -C ::ffff:$V1_IP4 \
+					   -T ::ffff:$V0_IP4 v0
+	ret=$?
+	cleanup
+}
+
+set -e
+trap cleanup 2 3 6 9
+
+test_xdp_features
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
new file mode 100644
index 000000000000..10fad1243573
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/netdev.h>
+#include <linux/if_link.h>
+#include <signal.h>
+#include <argp.h>
+#include <net/if.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <pthread.h>
+
+#include <network_helpers.h>
+
+#include "xdp_features.skel.h"
+#include "xdp_features.h"
+
+#define RED(str)	"\033[0;31m" str "\033[0m"
+#define GREEN(str)	"\033[0;32m" str "\033[0m"
+#define YELLOW(str)	"\033[0;33m" str "\033[0m"
+
+static struct env {
+	bool verbosity;
+	int ifindex;
+	bool is_tester;
+	struct {
+		enum netdev_xdp_act drv_feature;
+		enum xdp_action action;
+	} feature;
+	struct sockaddr_storage dut_ctrl_addr;
+	struct sockaddr_storage dut_addr;
+	struct sockaddr_storage tester_addr;
+} env;
+
+#define BUFSIZE		128
+
+void test__fail(void) { /* for network_helpers.c */ }
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+			   const char *format, va_list args)
+{
+	if (level == LIBBPF_DEBUG && !env.verbosity)
+		return 0;
+	return vfprintf(stderr, format, args);
+}
+
+static volatile bool exiting;
+
+static void sig_handler(int sig)
+{
+	exiting = true;
+}
+
+const char *argp_program_version = "xdp-features 0.0";
+const char argp_program_doc[] =
+"XDP features detecion application.\n"
+"\n"
+"XDP features application checks the XDP advertised features match detected ones.\n"
+"\n"
+"USAGE: ./xdp-features [-vt] [-f <xdp-feature>] [-D <dut-data-ip>] [-T <tester-data-ip>] [-C <dut-ctrl-ip>] <iface-name>\n"
+"\n"
+"dut-data-ip, tester-data-ip, dut-ctrl-ip: IPv6 or IPv4-mapped-IPv6 addresses;\n"
+"\n"
+"XDP features\n:"
+"- XDP_PASS\n"
+"- XDP_DROP\n"
+"- XDP_ABORTED\n"
+"- XDP_REDIRECT\n"
+"- XDP_NDO_XMIT\n"
+"- XDP_TX\n";
+
+static const struct argp_option opts[] = {
+	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
+	{ "tester", 't', NULL, 0, "Tester mode" },
+	{ "feature", 'f', "XDP-FEATURE", 0, "XDP feature to test" },
+	{ "dut_data_ip", 'D', "DUT-DATA-IP", 0, "DUT IP data channel" },
+	{ "dut_ctrl_ip", 'C', "DUT-CTRL-IP", 0, "DUT IP control channel" },
+	{ "tester_data_ip", 'T', "TESTER-DATA-IP", 0, "Tester IP data channel" },
+	{},
+};
+
+static int get_xdp_feature(const char *arg)
+{
+	if (!strcmp(arg, "XDP_PASS")) {
+		env.feature.action = XDP_PASS;
+		env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+	} else if (!strcmp(arg, "XDP_DROP")) {
+		env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+		env.feature.action = XDP_DROP;
+	} else if (!strcmp(arg, "XDP_ABORTED")) {
+		env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+		env.feature.action = XDP_ABORTED;
+	} else if (!strcmp(arg, "XDP_TX")) {
+		env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+		env.feature.action = XDP_TX;
+	} else if (!strcmp(arg, "XDP_REDIRECT")) {
+		env.feature.drv_feature = NETDEV_XDP_ACT_REDIRECT;
+		env.feature.action = XDP_REDIRECT;
+	} else if (!strcmp(arg, "XDP_NDO_XMIT")) {
+		env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static char *get_xdp_feature_str(void)
+{
+	switch (env.feature.action) {
+	case XDP_PASS:
+		return YELLOW("XDP_PASS");
+	case XDP_DROP:
+		return YELLOW("XDP_DROP");
+	case XDP_ABORTED:
+		return YELLOW("XDP_ABORTED");
+	case XDP_TX:
+		return YELLOW("XDP_TX");
+	case XDP_REDIRECT:
+		return YELLOW("XDP_REDIRECT");
+	default:
+		break;
+	}
+
+	if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT)
+		return YELLOW("XDP_NDO_XMIT");
+
+	return "";
+}
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case 'v':
+		env.verbosity = true;
+		break;
+	case 't':
+		env.is_tester = true;
+		break;
+	case 'f':
+		if (get_xdp_feature(arg) < 0) {
+			fprintf(stderr, "Invalid xdp feature: %s\n", arg);
+			argp_usage(state);
+			return ARGP_ERR_UNKNOWN;
+		}
+		break;
+	case 'D':
+		if (make_sockaddr(AF_INET6, arg, DUT_ECHO_PORT,
+				  &env.dut_addr, NULL)) {
+			fprintf(stderr, "Invalid DUT address: %s\n", arg);
+			return ARGP_ERR_UNKNOWN;
+		}
+		break;
+	case 'C':
+		if (make_sockaddr(AF_INET6, arg, DUT_CTRL_PORT,
+				  &env.dut_ctrl_addr, NULL)) {
+			fprintf(stderr, "Invalid DUT CTRL address: %s\n", arg);
+			return ARGP_ERR_UNKNOWN;
+		}
+		break;
+	case 'T':
+		if (make_sockaddr(AF_INET6, arg, 0, &env.tester_addr, NULL)) {
+			fprintf(stderr, "Invalid Tester address: %s\n", arg);
+			return ARGP_ERR_UNKNOWN;
+		}
+		break;
+	case ARGP_KEY_ARG:
+		errno = 0;
+		if (strlen(arg) >= IF_NAMESIZE) {
+			fprintf(stderr, "Invalid device name: %s\n", arg);
+			argp_usage(state);
+			return ARGP_ERR_UNKNOWN;
+		}
+
+		env.ifindex = if_nametoindex(arg);
+		if (!env.ifindex)
+			env.ifindex = strtoul(arg, NULL, 0);
+		if (!env.ifindex) {
+			fprintf(stderr,
+				"Bad interface index or name (%d): %s\n",
+				errno, strerror(errno));
+			argp_usage(state);
+			return ARGP_ERR_UNKNOWN;
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+static const struct argp argp = {
+	.options = opts,
+	.parser = parse_arg,
+	.doc = argp_program_doc,
+};
+
+static void set_env_default(void)
+{
+	env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
+	env.feature.action = -EINVAL;
+	env.ifindex = -ENODEV;
+	make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT,
+		      &env.dut_ctrl_addr, NULL);
+	make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT,
+		      &env.dut_addr, NULL);
+	make_sockaddr(AF_INET6, "::ffff:127.0.0.1", 0, &env.tester_addr, NULL);
+}
+
+static void *dut_echo_thread(void *arg)
+{
+	unsigned char buf[sizeof(struct tlv_hdr)];
+	int sockfd = *(int *)arg;
+
+	while (!exiting) {
+		struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+		struct sockaddr_storage addr;
+		socklen_t addrlen;
+		size_t n;
+
+		n = recvfrom(sockfd, buf, sizeof(buf), MSG_WAITALL,
+			     (struct sockaddr *)&addr, &addrlen);
+		if (n != ntohs(tlv->len))
+			continue;
+
+		if (ntohs(tlv->type) != CMD_ECHO)
+			continue;
+
+		sendto(sockfd, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM,
+		       (struct sockaddr *)&addr, addrlen);
+	}
+
+	pthread_exit((void *)0);
+	close(sockfd);
+
+	return NULL;
+}
+
+static int dut_run_echo_thread(pthread_t *t, int *sockfd)
+{
+	int err;
+
+	sockfd = start_reuseport_server(AF_INET6, SOCK_DGRAM, NULL,
+					DUT_ECHO_PORT, 0, 1);
+	if (!sockfd) {
+		fprintf(stderr, "Failed to create echo socket\n");
+		return -errno;
+	}
+
+	/* start echo channel */
+	err = pthread_create(t, NULL, dut_echo_thread, sockfd);
+	if (err) {
+		fprintf(stderr, "Failed creating dut_echo thread: %s\n",
+			strerror(-err));
+		free_fds(sockfd, 1);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dut_attach_xdp_prog(struct xdp_features *skel, int flags)
+{
+	enum xdp_action action = env.feature.action;
+	struct bpf_program *prog;
+	unsigned int key = 0;
+	int err, fd = 0;
+
+	if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT) {
+		struct bpf_devmap_val entry = {
+			.ifindex = env.ifindex,
+		};
+
+		err = bpf_map__update_elem(skel->maps.dev_map,
+					   &key, sizeof(key),
+					   &entry, sizeof(entry), 0);
+		if (err < 0)
+			return err;
+
+		fd = bpf_program__fd(skel->progs.xdp_do_redirect_cpumap);
+		action = XDP_REDIRECT;
+	}
+
+	switch (action) {
+	case XDP_TX:
+		prog = skel->progs.xdp_do_tx;
+		break;
+	case XDP_DROP:
+		prog = skel->progs.xdp_do_drop;
+		break;
+	case XDP_ABORTED:
+		prog = skel->progs.xdp_do_aborted;
+		break;
+	case XDP_PASS:
+		prog = skel->progs.xdp_do_pass;
+		break;
+	case XDP_REDIRECT: {
+		struct bpf_cpumap_val entry = {
+			.qsize = 2048,
+			.bpf_prog.fd = fd,
+		};
+
+		err = bpf_map__update_elem(skel->maps.cpu_map,
+					   &key, sizeof(key),
+					   &entry, sizeof(entry), 0);
+		if (err < 0)
+			return err;
+
+		prog = skel->progs.xdp_do_redirect;
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
+	if (err)
+		fprintf(stderr,
+			"Failed to attach XDP program to ifindex %d\n",
+			env.ifindex);
+	return err;
+}
+
+static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val,
+		    size_t val_size)
+{
+	struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+	size_t len;
+
+	len = recv(sockfd, buf, bufsize, 0);
+	if (len != ntohs(tlv->len) || len < sizeof(*tlv))
+		return -EINVAL;
+
+	if (val) {
+		len -= sizeof(*tlv);
+		if (len > val_size)
+			return -ENOMEM;
+
+		memcpy(val, tlv->data, len);
+	}
+
+	return 0;
+}
+
+static int dut_run(struct xdp_features *skel)
+{
+	int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
+	int state, err, *sockfd, ctrl_sockfd, echo_sockfd;
+	struct sockaddr_storage ctrl_addr;
+	pthread_t dut_thread;
+	socklen_t addrlen;
+
+	sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL,
+					DUT_CTRL_PORT, 0, 1);
+	if (!sockfd) {
+		fprintf(stderr, "Failed to create DUT socket\n");
+		return -errno;
+	}
+
+	ctrl_sockfd = accept(*sockfd, (struct sockaddr *)&ctrl_addr, &addrlen);
+	if (ctrl_sockfd < 0) {
+		fprintf(stderr, "Failed to accept connection on DUT socket\n");
+		free_fds(sockfd, 1);
+		return -errno;
+	}
+
+	/* CTRL loop */
+	while (!exiting) {
+		unsigned char buf[BUFSIZE] = {};
+		struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+
+		err = recv_msg(ctrl_sockfd, buf, BUFSIZE, NULL, 0);
+		if (err)
+			continue;
+
+		switch (ntohs(tlv->type)) {
+		case CMD_START: {
+			if (state == CMD_START)
+				continue;
+
+			state = CMD_START;
+			/* Load the XDP program on the DUT */
+			err = dut_attach_xdp_prog(skel, flags);
+			if (err)
+				goto out;
+
+			err = dut_run_echo_thread(&dut_thread, &echo_sockfd);
+			if (err < 0)
+				goto out;
+
+			tlv->type = htons(CMD_ACK);
+			tlv->len = htons(sizeof(*tlv));
+			err = send(ctrl_sockfd, buf, sizeof(*tlv), 0);
+			if (err < 0)
+				goto end_thread;
+			break;
+		}
+		case CMD_STOP:
+			if (state != CMD_START)
+				break;
+
+			state = CMD_STOP;
+
+			exiting = true;
+			bpf_xdp_detach(env.ifindex, flags, NULL);
+
+			tlv->type = htons(CMD_ACK);
+			tlv->len = htons(sizeof(*tlv));
+			err = send(ctrl_sockfd, buf, sizeof(*tlv), 0);
+			goto end_thread;
+		case CMD_GET_XDP_CAP: {
+			LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+			unsigned long long val;
+			size_t n;
+
+			err = bpf_xdp_query(env.ifindex, XDP_FLAGS_DRV_MODE,
+					    &opts);
+			if (err) {
+				fprintf(stderr,
+					"Failed to query XDP cap for ifindex %d\n",
+					env.ifindex);
+				goto end_thread;
+			}
+
+			tlv->type = htons(CMD_ACK);
+			n = sizeof(*tlv) + sizeof(opts.feature_flags);
+			tlv->len = htons(n);
+
+			val = htobe64(opts.feature_flags);
+			memcpy(tlv->data, &val, sizeof(val));
+
+			err = send(ctrl_sockfd, buf, n, 0);
+			if (err < 0)
+				goto end_thread;
+			break;
+		}
+		case CMD_GET_STATS: {
+			unsigned int key = 0, val;
+			size_t n;
+
+			err = bpf_map__lookup_elem(skel->maps.dut_stats,
+						   &key, sizeof(key),
+						   &val, sizeof(val), 0);
+			if (err) {
+				fprintf(stderr, "bpf_map_lookup_elem failed\n");
+				goto end_thread;
+			}
+
+			tlv->type = htons(CMD_ACK);
+			n = sizeof(*tlv) + sizeof(val);
+			tlv->len = htons(n);
+
+			val = htonl(val);
+			memcpy(tlv->data, &val, sizeof(val));
+
+			err = send(ctrl_sockfd, buf, n, 0);
+			if (err < 0)
+				goto end_thread;
+			break;
+		}
+		default:
+			break;
+		}
+	}
+
+end_thread:
+	pthread_join(dut_thread, NULL);
+out:
+	bpf_xdp_detach(env.ifindex, flags, NULL);
+	close(ctrl_sockfd);
+	free_fds(sockfd, 1);
+
+	return err;
+}
+
+static bool tester_collect_detected_cap(struct xdp_features *skel,
+					unsigned int dut_stats)
+{
+	unsigned int err, key = 0, val;
+
+	if (!dut_stats)
+		return false;
+
+	err = bpf_map__lookup_elem(skel->maps.stats, &key, sizeof(key),
+				   &val, sizeof(val), 0);
+	if (err) {
+		fprintf(stderr, "bpf_map_lookup_elem failed\n");
+		return false;
+	}
+
+	switch (env.feature.action) {
+	case XDP_PASS:
+	case XDP_TX:
+	case XDP_REDIRECT:
+		return val > 0;
+	case XDP_DROP:
+	case XDP_ABORTED:
+		return val == 0;
+	default:
+		break;
+	}
+
+	if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT)
+		return val > 0;
+
+	return false;
+}
+
+static int send_and_recv_msg(int sockfd, enum test_commands cmd, void *val,
+			     size_t val_size)
+{
+	unsigned char buf[BUFSIZE] = {};
+	struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+	int err;
+
+	tlv->type = htons(cmd);
+	tlv->len = htons(sizeof(*tlv));
+
+	err = send(sockfd, buf, sizeof(*tlv), 0);
+	if (err < 0)
+		return err;
+
+	err = recv_msg(sockfd, buf, BUFSIZE, val, val_size);
+	if (err < 0)
+		return err;
+
+	return ntohs(tlv->type) == CMD_ACK ? 0 : -EINVAL;
+}
+
+static int send_echo_msg(void)
+{
+	unsigned char buf[sizeof(struct tlv_hdr)];
+	struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+	int sockfd, n;
+
+	sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+	if (sockfd < 0) {
+		fprintf(stderr, "Failed to create echo socket\n");
+		return -errno;
+	}
+
+	tlv->type = htons(CMD_ECHO);
+	tlv->len = htons(sizeof(*tlv));
+
+	n = sendto(sockfd, buf, sizeof(*tlv), MSG_NOSIGNAL | MSG_CONFIRM,
+		   (struct sockaddr *)&env.dut_addr, sizeof(env.dut_addr));
+	close(sockfd);
+
+	return n == ntohs(tlv->len) ? 0 : -EINVAL;
+}
+
+static int tester_run(struct xdp_features *skel)
+{
+	int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
+	unsigned long long advertised_feature;
+	struct bpf_program *prog;
+	unsigned int stats;
+	int i, err, sockfd;
+	bool detected_cap;
+
+	sockfd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (sockfd < 0) {
+		fprintf(stderr, "Failed to create tester socket\n");
+		return -errno;
+	}
+
+	if (settimeo(sockfd, 1000) < 0)
+		return -EINVAL;
+
+	err = connect(sockfd, (struct sockaddr *)&env.dut_ctrl_addr,
+		      sizeof(env.dut_ctrl_addr));
+	if (err) {
+		fprintf(stderr, "Failed to connect to the DUT\n");
+		return -errno;
+	}
+
+	err = send_and_recv_msg(sockfd, CMD_GET_XDP_CAP, &advertised_feature,
+				sizeof(advertised_feature));
+	if (err < 0) {
+		close(sockfd);
+		return err;
+	}
+
+	advertised_feature = be64toh(advertised_feature);
+
+	if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT ||
+	    env.feature.action == XDP_TX)
+		prog = skel->progs.xdp_tester_check_tx;
+	else
+		prog = skel->progs.xdp_tester_check_rx;
+
+	err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
+	if (err) {
+		fprintf(stderr, "Failed to attach XDP program to ifindex %d\n",
+			env.ifindex);
+		goto out;
+	}
+
+	err = send_and_recv_msg(sockfd, CMD_START, NULL, 0);
+	if (err)
+		goto out;
+
+	for (i = 0; i < 10 && !exiting; i++) {
+		err = send_echo_msg();
+		if (err < 0)
+			goto out;
+
+		sleep(1);
+	}
+
+	err = send_and_recv_msg(sockfd, CMD_GET_STATS, &stats, sizeof(stats));
+	if (err)
+		goto out;
+
+	/* stop the test */
+	err = send_and_recv_msg(sockfd, CMD_STOP, NULL, 0);
+	/* send a new echo message to wake echo thread of the dut */
+	send_echo_msg();
+
+	detected_cap = tester_collect_detected_cap(skel, ntohl(stats));
+
+	fprintf(stdout, "Feature %s: [%s][%s]\n", get_xdp_feature_str(),
+		detected_cap ? GREEN("DETECTED") : RED("NOT DETECTED"),
+		env.feature.drv_feature & advertised_feature ? GREEN("ADVERTISED")
+							     : RED("NOT ADVERTISED"));
+out:
+	bpf_xdp_detach(env.ifindex, flags, NULL);
+	close(sockfd);
+	return err < 0 ? err : 0;
+}
+
+int main(int argc, char **argv)
+{
+	struct xdp_features *skel;
+	int err;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	libbpf_set_print(libbpf_print_fn);
+
+	signal(SIGINT, sig_handler);
+	signal(SIGTERM, sig_handler);
+
+	set_env_default();
+
+	/* Parse command line arguments */
+	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+	if (err)
+		return err;
+
+	if (env.ifindex < 0) {
+		fprintf(stderr, "Invalid ifindex\n");
+		return -ENODEV;
+	}
+
+	/* Load and verify BPF application */
+	skel = xdp_features__open();
+	if (!skel) {
+		fprintf(stderr, "Failed to open and load BPF skeleton\n");
+		return -EINVAL;
+	}
+
+	skel->rodata->tester_addr =
+		((struct sockaddr_in6 *)&env.tester_addr)->sin6_addr;
+	skel->rodata->dut_addr =
+		((struct sockaddr_in6 *)&env.dut_addr)->sin6_addr;
+
+	/* Load & verify BPF programs */
+	err = xdp_features__load(skel);
+	if (err) {
+		fprintf(stderr, "Failed to load and verify BPF skeleton\n");
+		goto cleanup;
+	}
+
+	err = xdp_features__attach(skel);
+	if (err) {
+		fprintf(stderr, "Failed to attach BPF skeleton\n");
+		goto cleanup;
+	}
+
+	if (env.is_tester) {
+		/* Tester */
+		fprintf(stdout, "Starting tester on device %d\n", env.ifindex);
+		err = tester_run(skel);
+	} else {
+		/* DUT */
+		fprintf(stdout, "Starting DUT on device %d\n", env.ifindex);
+		err = dut_run(skel);
+	}
+
+cleanup:
+	xdp_features__destroy(skel);
+
+	return err < 0 ? -err : 0;
+}
diff --git a/tools/testing/selftests/bpf/xdp_features.h b/tools/testing/selftests/bpf/xdp_features.h
new file mode 100644
index 000000000000..2670c541713b
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_features.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* test commands */
+enum test_commands {
+	CMD_STOP,		/* CMD */
+	CMD_START,		/* CMD */
+	CMD_ECHO,		/* CMD */
+	CMD_ACK,		/* CMD + data */
+	CMD_GET_XDP_CAP,	/* CMD */
+	CMD_GET_STATS,		/* CMD */
+};
+
+#define DUT_CTRL_PORT	12345
+#define DUT_ECHO_PORT	12346
+
+struct tlv_hdr {
+	__be16 type;
+	__be16 len;
+	__u8 data[];
+};
-- 
cgit v1.2.3


From 344dd2c9e74373cca454dcea4b62be50adc35939 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:27 +0100
Subject: selftests: forwarding: Move IGMP- and MLD-related functions to lib

These functions will be helpful for other testsuites as well. Extract them
to a common place.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/forwarding/bridge_mdb.sh | 49 ----------------------
 tools/testing/selftests/net/forwarding/lib.sh      | 49 ++++++++++++++++++++++
 2 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 2fa5973c0c28..51f2b0d77067 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -1018,26 +1018,6 @@ fwd_test()
 	ip -6 address del fe80::1/64 dev br0
 }
 
-igmpv3_is_in_get()
-{
-	local igmpv3
-
-	igmpv3=$(:
-		)"22:"$(			: Type - Membership Report
-		)"00:"$(			: Reserved
-		)"2a:f8:"$(			: Checksum
-		)"00:00:"$(			: Reserved
-		)"00:01:"$(			: Number of Group Records
-		)"01:"$(			: Record Type - IS_IN
-		)"00:"$(			: Aux Data Len
-		)"00:01:"$(			: Number of Sources
-		)"ef:01:01:01:"$(		: Multicast Address - 239.1.1.1
-		)"c0:00:02:02"$(		: Source Address - 192.0.2.2
-		)
-
-	echo $igmpv3
-}
-
 ctrl_igmpv3_is_in_test()
 {
 	RET=0
@@ -1077,35 +1057,6 @@ ctrl_igmpv3_is_in_test()
 	log_test "IGMPv3 MODE_IS_INCLUE tests"
 }
 
-mldv2_is_in_get()
-{
-	local hbh
-	local icmpv6
-
-	hbh=$(:
-		)"3a:"$(			: Next Header - ICMPv6
-		)"00:"$(			: Hdr Ext Len
-		)"00:00:00:00:00:00:"$(		: Options and Padding
-		)
-
-	icmpv6=$(:
-		)"8f:"$(			: Type - MLDv2 Report
-		)"00:"$(			: Code
-		)"45:39:"$(			: Checksum
-		)"00:00:"$(			: Reserved
-		)"00:01:"$(			: Number of Group Records
-		)"01:"$(			: Record Type - IS_IN
-		)"00:"$(			: Aux Data Len
-		)"00:01:"$(			: Number of Sources
-		)"ff:0e:00:00:00:00:00:00:"$(	: Multicast address - ff0e::1
-		)"00:00:00:00:00:00:00:01:"$(	:
-		)"20:01:0d:b8:00:01:00:00:"$(	: Source Address - 2001:db8:1::2
-		)"00:00:00:00:00:00:00:02:"$(	:
-		)
-
-	echo ${hbh}${icmpv6}
-}
-
 ctrl_mldv2_is_in_test()
 {
 	RET=0
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 29cd4705c752..736f56098cc7 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1671,3 +1671,52 @@ hw_stats_monitor_test()
 
 	log_test "${type}_stats notifications"
 }
+
+igmpv3_is_in_get()
+{
+	local igmpv3
+
+	igmpv3=$(:
+		)"22:"$(			: Type - Membership Report
+		)"00:"$(			: Reserved
+		)"2a:f8:"$(			: Checksum
+		)"00:00:"$(			: Reserved
+		)"00:01:"$(			: Number of Group Records
+		)"01:"$(			: Record Type - IS_IN
+		)"00:"$(			: Aux Data Len
+		)"00:01:"$(			: Number of Sources
+		)"ef:01:01:01:"$(		: Multicast Address - 239.1.1.1
+		)"c0:00:02:02"$(		: Source Address - 192.0.2.2
+		)
+
+	echo $igmpv3
+}
+
+mldv2_is_in_get()
+{
+	local hbh
+	local icmpv6
+
+	hbh=$(:
+		)"3a:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)
+
+	icmpv6=$(:
+		)"8f:"$(			: Type - MLDv2 Report
+		)"00:"$(			: Code
+		)"45:39:"$(			: Checksum
+		)"00:00:"$(			: Reserved
+		)"00:01:"$(			: Number of Group Records
+		)"01:"$(			: Record Type - IS_IN
+		)"00:"$(			: Aux Data Len
+		)"00:01:"$(			: Number of Sources
+		)"ff:0e:00:00:00:00:00:00:"$(	: Multicast address - ff0e::1
+		)"00:00:00:00:00:00:00:01:"$(	:
+		)"20:01:0d:b8:00:01:00:00:"$(	: Source Address - 2001:db8:1::2
+		)"00:00:00:00:00:00:00:02:"$(	:
+		)
+
+	echo ${hbh}${icmpv6}
+}
-- 
cgit v1.2.3


From f7ccf60c4adada7e13d3d6798621cabcdaf3828e Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:28 +0100
Subject: selftests: forwarding: bridge_mdb: Fix a typo

Add the letter missing from the word "INCLUDE".

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/bridge_mdb.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 51f2b0d77067..4e16677f02ba 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -1054,7 +1054,7 @@ ctrl_igmpv3_is_in_test()
 
 	bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
 
-	log_test "IGMPv3 MODE_IS_INCLUE tests"
+	log_test "IGMPv3 MODE_IS_INCLUDE tests"
 }
 
 ctrl_mldv2_is_in_test()
-- 
cgit v1.2.3


From fcf4927632eecb25e40c8c5ac1e40df2090cc2eb Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:29 +0100
Subject: selftests: forwarding: lib: Add helpers for IP address handling

In order to generate IGMPv3 and MLDv2 packets on the fly, we will need
helpers to expand IPv4 and IPv6 addresses given as parameters in
mausezahn payload notation. Add helpers that do it.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 37 +++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 736f56098cc7..9fe3004af6dc 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1672,6 +1672,43 @@ hw_stats_monitor_test()
 	log_test "${type}_stats notifications"
 }
 
+ipv4_to_bytes()
+{
+	local IP=$1; shift
+
+	printf '%02x:' ${IP//./ } |
+	    sed 's/:$//'
+}
+
+# Convert a given IPv6 address, `IP' such that the :: token, if present, is
+# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
+# digits. An optional `BYTESEP' parameter can be given to further separate
+# individual bytes of each 16-bit group.
+expand_ipv6()
+{
+	local IP=$1; shift
+	local bytesep=$1; shift
+
+	local cvt_ip=${IP/::/_}
+	local colons=${cvt_ip//[^:]/}
+	local allcol=:::::::
+	# IP where :: -> the appropriate number of colons:
+	local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
+
+	echo $allcol_ip | tr : '\n' |
+	    sed s/^/0000/ |
+	    sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
+	    tr '\n' : |
+	    sed 's/:$//'
+}
+
+ipv6_to_bytes()
+{
+	local IP=$1; shift
+
+	expand_ipv6 "$IP" :
+}
+
 igmpv3_is_in_get()
 {
 	local igmpv3
-- 
cgit v1.2.3


From 952e0ee38c7215c45192d8c899acd1830873f28b Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:30 +0100
Subject: selftests: forwarding: lib: Add helpers for checksum handling

In order to generate IGMPv3 and MLDv2 packets on the fly, we will need
helpers to calculate the packet checksum.

The approach presented in this patch revolves around payload templates
for mausezahn. These are mausezahn-like payload strings (01:23:45:...)
with possibly one 2-byte sequence replaced with the word PAYLOAD. The
main function is payload_template_calc_checksum(), which calculates
RFC 1071 checksum of the message. There are further helpers to then
convert the checksum to the payload format, and to expand it.

For IPv6, MLDv2 message checksum is computed using a pseudoheader that
differs from the header used in the payload itself. The fact that the
two messages are different means that the checksum needs to be
returned as a separate quantity, instead of being expanded in-place in
the payload itself. Furthermore, the pseudoheader includes a length of
the message. Much like the checksum, this needs to be expanded in
mausezahn format. And likewise for number of addresses for (S,G)
entries. Thus we have several places where a computed quantity needs
to be presented in the payload format. Add a helper u16_to_bytes(),
which will be used in all these cases.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 56 +++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 9fe3004af6dc..733bee34631a 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1709,6 +1709,62 @@ ipv6_to_bytes()
 	expand_ipv6 "$IP" :
 }
 
+u16_to_bytes()
+{
+	local u16=$1; shift
+
+	printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
+}
+
+# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
+# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
+# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
+# stands for 00:00.
+payload_template_calc_checksum()
+{
+	local payload=$1; shift
+
+	(
+	    # Set input radix.
+	    echo "16i"
+	    # Push zero for the initial checksum.
+	    echo 0
+
+	    # Pad the payload with a terminating 00: in case we get an odd
+	    # number of bytes.
+	    echo "${payload%:}:00:" |
+		sed 's/CHECKSUM/00:00/g' |
+		tr '[:lower:]' '[:upper:]' |
+		# Add the word to the checksum.
+		sed 's/\(..\):\(..\):/\1\2+\n/g' |
+		# Strip the extra odd byte we pushed if left unconverted.
+		sed 's/\(..\):$//'
+
+	    echo "10000 ~ +"	# Calculate and add carry.
+	    echo "FFFF r - p"	# Bit-flip and print.
+	) |
+	    dc |
+	    tr '[:upper:]' '[:lower:]'
+}
+
+payload_template_expand_checksum()
+{
+	local payload=$1; shift
+	local checksum=$1; shift
+
+	local ckbytes=$(u16_to_bytes $checksum)
+
+	echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
+}
+
+payload_template_nbytes()
+{
+	local payload=$1; shift
+
+	payload_template_expand_checksum "${payload%:}" 0 |
+		sed 's/:/\n/g' | wc -l
+}
+
 igmpv3_is_in_get()
 {
 	local igmpv3
-- 
cgit v1.2.3


From 506a1ac9d32b52606f59acbb0e20c1a7adbb8511 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:31 +0100
Subject: selftests: forwarding: lib: Parameterize IGMPv3/MLDv2 generation

In order to generate IGMPv3 and MLDv2 packets on the fly, the
functions that generate these packets need to be able to generate
packets for different groups and different sources. Generating MLDv2
packets further needs the source address of the packet for purposes of
checksum calculation. Add the necessary parameters, and generate the
payload accordingly by dispatching to helpers added in the previous
patches.

Adjust the sole client, bridge_mdb.sh, as well.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/forwarding/bridge_mdb.sh |  9 +++---
 tools/testing/selftests/net/forwarding/lib.sh      | 36 ++++++++++++++++------
 2 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 4e16677f02ba..b48867d8cadf 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -1029,7 +1029,7 @@ ctrl_igmpv3_is_in_test()
 
 	# IS_IN ( 192.0.2.2 )
 	$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-		-t ip proto=2,p=$(igmpv3_is_in_get) -q
+		-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
 	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
 	check_fail $? "Permanent entry affected by IGMP packet"
@@ -1042,7 +1042,7 @@ ctrl_igmpv3_is_in_test()
 
 	# IS_IN ( 192.0.2.2 )
 	$MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-		-t ip proto=2,p=$(igmpv3_is_in_get) -q
+		-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
 	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
 		grep -q 192.0.2.2
@@ -1067,8 +1067,9 @@ ctrl_mldv2_is_in_test()
 		filter_mode include source_list 2001:db8:1::1
 
 	# IS_IN ( 2001:db8:1::2 )
+	local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
 	$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
-		-t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+		-t ip hop=1,next=0,p="$p" -q
 
 	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
 		grep -q 2001:db8:1::2
@@ -1082,7 +1083,7 @@ ctrl_mldv2_is_in_test()
 
 	# IS_IN ( 2001:db8:1::2 )
 	$MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
-		-t ip hop=1,next=0,p=$(mldv2_is_in_get) -q
+		-t ip hop=1,next=0,p="$p" -q
 
 	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
 		grep -q 2001:db8:1::2
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 733bee34631a..4896f21493c8 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1767,26 +1767,35 @@ payload_template_nbytes()
 
 igmpv3_is_in_get()
 {
+	local GRP=$1; shift
+	local IP=$1; shift
+
 	local igmpv3
 
+	# IS_IN ( $IP )
 	igmpv3=$(:
 		)"22:"$(			: Type - Membership Report
 		)"00:"$(			: Reserved
-		)"2a:f8:"$(			: Checksum
+		)"CHECKSUM:"$(			: Checksum
 		)"00:00:"$(			: Reserved
 		)"00:01:"$(			: Number of Group Records
 		)"01:"$(			: Record Type - IS_IN
 		)"00:"$(			: Aux Data Len
 		)"00:01:"$(			: Number of Sources
-		)"ef:01:01:01:"$(		: Multicast Address - 239.1.1.1
-		)"c0:00:02:02"$(		: Source Address - 192.0.2.2
+		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
+		)"$(ipv4_to_bytes $IP)"$(	: Source Address
 		)
+	local checksum=$(payload_template_calc_checksum "$igmpv3")
 
-	echo $igmpv3
+	payload_template_expand_checksum "$igmpv3" $checksum
 }
 
 mldv2_is_in_get()
 {
+	local SIP=$1; shift
+	local GRP=$1; shift
+	local IP=$1; shift
+
 	local hbh
 	local icmpv6
 
@@ -1799,17 +1808,24 @@ mldv2_is_in_get()
 	icmpv6=$(:
 		)"8f:"$(			: Type - MLDv2 Report
 		)"00:"$(			: Code
-		)"45:39:"$(			: Checksum
+		)"CHECKSUM:"$(			: Checksum
 		)"00:00:"$(			: Reserved
 		)"00:01:"$(			: Number of Group Records
 		)"01:"$(			: Record Type - IS_IN
 		)"00:"$(			: Aux Data Len
 		)"00:01:"$(			: Number of Sources
-		)"ff:0e:00:00:00:00:00:00:"$(	: Multicast address - ff0e::1
-		)"00:00:00:00:00:00:00:01:"$(	:
-		)"20:01:0d:b8:00:01:00:00:"$(	: Source Address - 2001:db8:1::2
-		)"00:00:00:00:00:00:00:02:"$(	:
+		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
+		)"$(ipv6_to_bytes $IP):"$(	: Source Address
 		)
 
-	echo ${hbh}${icmpv6}
+	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+	local sudohdr=$(:
+		)"$(ipv6_to_bytes $SIP):"$(	: SIP
+		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
+	        )"${len}:"$(			: Upper-layer length
+	        )"00:3a:"$(			: Zero and next-header
+	        )
+	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+	payload_template_expand_checksum "$hbh$icmpv6" $checksum
 }
-- 
cgit v1.2.3


From 705d4bc7b6b6563e7c2d4b3b3da8da9af842aacc Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:32 +0100
Subject: selftests: forwarding: lib: Allow list of IPs for IGMPv3/MLDv2

The testsuite that checks for mcast_max_groups functionality will need
to generate IGMP and MLD packets with configurable number of (S,G)
addresses. To that end, further extend igmpv3_is_in_get() and
mldv2_is_in_get() to allow a list of IP addresses instead of one
address.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 4896f21493c8..df6b3208e236 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1768,11 +1768,12 @@ payload_template_nbytes()
 igmpv3_is_in_get()
 {
 	local GRP=$1; shift
-	local IP=$1; shift
+	local sources=("$@")
 
 	local igmpv3
+	local nsources=$(u16_to_bytes ${#sources[@]})
 
-	# IS_IN ( $IP )
+	# IS_IN ( $sources )
 	igmpv3=$(:
 		)"22:"$(			: Type - Membership Report
 		)"00:"$(			: Reserved
@@ -1781,9 +1782,12 @@ igmpv3_is_in_get()
 		)"00:01:"$(			: Number of Group Records
 		)"01:"$(			: Record Type - IS_IN
 		)"00:"$(			: Aux Data Len
-		)"00:01:"$(			: Number of Sources
+		)"${nsources}:"$(		: Number of Sources
 		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
-		)"$(ipv4_to_bytes $IP)"$(	: Source Address
+		)"$(for src in "${sources[@]}"; do
+			ipv4_to_bytes $src
+			echo -n :
+		    done)"$(			: Source Addresses
 		)
 	local checksum=$(payload_template_calc_checksum "$igmpv3")
 
@@ -1794,10 +1798,11 @@ mldv2_is_in_get()
 {
 	local SIP=$1; shift
 	local GRP=$1; shift
-	local IP=$1; shift
+	local sources=("$@")
 
 	local hbh
 	local icmpv6
+	local nsources=$(u16_to_bytes ${#sources[@]})
 
 	hbh=$(:
 		)"3a:"$(			: Next Header - ICMPv6
@@ -1813,9 +1818,12 @@ mldv2_is_in_get()
 		)"00:01:"$(			: Number of Group Records
 		)"01:"$(			: Record Type - IS_IN
 		)"00:"$(			: Aux Data Len
-		)"00:01:"$(			: Number of Sources
+		)"${nsources}:"$(		: Number of Sources
 		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
-		)"$(ipv6_to_bytes $IP):"$(	: Source Address
+		)"$(for src in "${sources[@]}"; do
+			ipv6_to_bytes $src
+			echo -n :
+		    done)"$(			: Source Addresses
 		)
 
 	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
-- 
cgit v1.2.3


From 9ae8546973171230488ad631d2b9eb844466a37d Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:33 +0100
Subject: selftests: forwarding: lib: Add helpers to build IGMP/MLD leave
 packets

The testsuite that checks for mcast_max_groups functionality will need to
wipe the added groups as well. Add helpers to build an IGMP or MLD packets
announcing that host is leaving a given group.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh | 50 +++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index df6b3208e236..cc7c4f89a097 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1794,6 +1794,21 @@ igmpv3_is_in_get()
 	payload_template_expand_checksum "$igmpv3" $checksum
 }
 
+igmpv2_leave_get()
+{
+	local GRP=$1; shift
+
+	local payload=$(:
+		)"17:"$(			: Type - Leave Group
+		)"00:"$(			: Max Resp Time - not meaningful
+		)"CHECKSUM:"$(			: Checksum
+		)"$(ipv4_to_bytes $GRP)"$(	: Group Address
+		)
+	local checksum=$(payload_template_calc_checksum "$payload")
+
+	payload_template_expand_checksum "$payload" $checksum
+}
+
 mldv2_is_in_get()
 {
 	local SIP=$1; shift
@@ -1837,3 +1852,38 @@ mldv2_is_in_get()
 
 	payload_template_expand_checksum "$hbh$icmpv6" $checksum
 }
+
+mldv1_done_get()
+{
+	local SIP=$1; shift
+	local GRP=$1; shift
+
+	local hbh
+	local icmpv6
+
+	hbh=$(:
+		)"3a:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)
+
+	icmpv6=$(:
+		)"84:"$(			: Type - MLDv1 Done
+		)"00:"$(			: Code
+		)"CHECKSUM:"$(			: Checksum
+		)"00:00:"$(			: Max Resp Delay - not meaningful
+		)"00:00:"$(			: Reserved
+		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
+		)
+
+	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+	local sudohdr=$(:
+		)"$(ipv6_to_bytes $SIP):"$(	: SIP
+		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
+	        )"${len}:"$(			: Upper-layer length
+	        )"00:3a:"$(			: Zero and next-header
+	        )
+	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+	payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}
-- 
cgit v1.2.3


From 3446dcd7df05ed6cc0095f21f871a61f1f28ed4e Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 2 Feb 2023 18:59:34 +0100
Subject: selftests: forwarding: bridge_mdb_max: Add a new selftest

Add a suite covering mcast_n_groups and mcast_max_groups bridge features.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/Makefile    |    1 +
 .../selftests/net/forwarding/bridge_mdb_max.sh     | 1336 ++++++++++++++++++++
 2 files changed, 1337 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_mdb_max.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 453ae006fbcf..91201ab3c4fc 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -4,6 +4,7 @@ TEST_PROGS = bridge_igmp.sh \
 	bridge_locked_port.sh \
 	bridge_mdb.sh \
 	bridge_mdb_host.sh \
+	bridge_mdb_max.sh \
 	bridge_mdb_port_down.sh \
 	bridge_mld.sh \
 	bridge_port_isolation.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
new file mode 100755
index 000000000000..ae255b662ba3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -0,0 +1,1336 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# | + $h1.10              |                          | + $h2.10               |
+# | | 192.0.2.1/28        |                          | | 192.0.2.2/28         |
+# | | 2001:db8:1::1/64    |                          | | 2001:db8:1::2/64     |
+# | |                     |                          | |                      |
+# | |  + $h1.20           |                          | |  + $h2.20            |
+# | \  | 198.51.100.1/24  |                          | \  | 198.51.100.2/24   |
+# |  \ | 2001:db8:2::1/64 |                          |  \ | 2001:db8:2::2/64  |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR0 (802.1q)             + $swp2           | |
+# | |     vid 10                                             vid 10         | |
+# | |     vid 20                                             vid 20         | |
+# | |                                                                       | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_8021d
+	test_8021q
+	test_8021qvs
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+switch_create_8021d()
+{
+	log_info "802.1d tests"
+
+	ip link add name br0 type bridge vlan_filtering 0 \
+		mcast_snooping 1 \
+		mcast_igmp_version 3 mcast_mld_version 2
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	bridge link set dev $swp1 fastleave on
+
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+}
+
+switch_create_8021q()
+{
+	local br_flags=$1; shift
+
+	log_info "802.1q $br_flags${br_flags:+ }tests"
+
+	ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 1 $br_flags \
+		mcast_igmp_version 3 mcast_mld_version 2
+	bridge vlan add vid 10 dev br0 self
+	bridge vlan add vid 20 dev br0 self
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	bridge link set dev $swp1 fastleave on
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+	bridge vlan add vid 10 dev $swp2
+	bridge vlan add vid 20 dev $swp2
+}
+
+switch_create_8021qvs()
+{
+	switch_create_8021q "mcast_vlan_snooping 1"
+	bridge vlan global set dev br0 vid 10 mcast_igmp_version 3
+	bridge vlan global set dev br0 vid 10 mcast_mld_version 2
+	bridge vlan global set dev br0 vid 20 mcast_igmp_version 3
+	bridge vlan global set dev br0 vid 20 mcast_mld_version 2
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev br0 down
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy 2>/dev/null
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+cfg_src_list()
+{
+	local IPs=("$@")
+	local IPstr=$(echo ${IPs[@]} | tr '[:space:]' , | sed 's/,$//')
+
+	echo ${IPstr:+source_list }${IPstr}
+}
+
+cfg_group_op()
+{
+	local op=$1; shift
+	local locus=$1; shift
+	local GRP=$1; shift
+	local state=$1; shift
+	local IPs=("$@")
+
+	local source_list=$(cfg_src_list ${IPs[@]})
+
+	# Everything besides `bridge mdb' uses the "dev X vid Y" syntax,
+	# so we use it here as well and convert.
+	local br_locus=$(echo "$locus" | sed 's/^dev /port /')
+
+	bridge mdb $op dev br0 $br_locus grp $GRP $state \
+	       filter_mode include $source_list
+}
+
+cfg4_entries_op()
+{
+	local op=$1; shift
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local GRP=239.1.1.${grp}
+	local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+	cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg4_entries_add()
+{
+	cfg4_entries_op add "$@"
+}
+
+cfg4_entries_del()
+{
+	cfg4_entries_op del "$@"
+}
+
+cfg6_entries_op()
+{
+	local op=$1; shift
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local GRP=ff0e::${grp}
+	local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+	cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg6_entries_add()
+{
+	cfg6_entries_op add "$@"
+}
+
+cfg6_entries_del()
+{
+	cfg6_entries_op del "$@"
+}
+
+locus_dev_peer()
+{
+	local dev_kw=$1; shift
+	local dev=$1; shift
+	local vid_kw=$1; shift
+	local vid=$1; shift
+
+	echo "$h1.${vid:-10}"
+}
+
+locus_dev()
+{
+	local dev_kw=$1; shift
+	local dev=$1; shift
+
+	echo $dev
+}
+
+ctl4_entries_add()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+	local peer=$(locus_dev_peer $locus)
+	local GRP=239.1.1.${grp}
+	$MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+		-t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
+	sleep 1
+
+	local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+	if ((nn != n)); then
+		echo mcast_max_groups > /dev/stderr
+		false
+	fi
+}
+
+ctl4_entries_del()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local peer=$(locus_dev_peer $locus)
+	local GRP=239.1.1.${grp}
+	$MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+		-t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
+	sleep 1
+	! bridge mdb show dev br0 | grep -q $GRP
+}
+
+ctl6_entries_add()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+	local peer=$(locus_dev_peer $locus)
+	local SIP=fe80::1
+	local GRP=ff0e::${grp}
+	local p=$(mldv2_is_in_get $SIP $GRP $IPs)
+	$MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+	sleep 1
+
+	local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+	if ((nn != n)); then
+		echo mcast_max_groups > /dev/stderr
+		false
+	fi
+}
+
+ctl6_entries_del()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local peer=$(locus_dev_peer $locus)
+	local SIP=fe80::1
+	local GRP=ff0e::${grp}
+	local p=$(mldv1_done_get $SIP $GRP)
+	$MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+	sleep 1
+	! bridge mdb show dev br0 | grep -q $GRP
+}
+
+bridge_maxgroups_errmsg_check_cfg()
+{
+	local msg=$1; shift
+	local needle=$1; shift
+
+	echo "$msg" | grep -q mcast_max_groups
+	check_err $? "Adding MDB entries failed for the wrong reason: $msg"
+}
+
+bridge_maxgroups_errmsg_check_cfg4()
+{
+	bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_cfg6()
+{
+	bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_ctl4()
+{
+	:
+}
+
+bridge_maxgroups_errmsg_check_ctl6()
+{
+	:
+}
+
+bridge_port_ngroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d link show $locus |
+	    jq '.[].mcast_n_groups'
+}
+
+bridge_port_maxgroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d link show $locus |
+	    jq '.[].mcast_max_groups'
+}
+
+bridge_port_maxgroups_set()
+{
+	local locus=$1; shift
+	local max=$1; shift
+
+	bridge link set dev $(locus_dev $locus) mcast_max_groups $max
+}
+
+bridge_port_vlan_ngroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d vlan show $locus |
+	    jq '.[].vlans[].mcast_n_groups'
+}
+
+bridge_port_vlan_maxgroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d vlan show $locus |
+	    jq '.[].vlans[].mcast_max_groups'
+}
+
+bridge_port_vlan_maxgroups_set()
+{
+	local locus=$1; shift
+	local max=$1; shift
+
+	bridge vlan set $locus mcast_max_groups $max
+}
+
+test_ngroups_reporting()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+
+	local n0=$(bridge_${context}_ngroups_get "$locus")
+	${CFG}_entries_add "$locus" temp 5
+	check_err $? "Couldn't add MDB entries"
+	local n1=$(bridge_${context}_ngroups_get "$locus")
+
+	((n1 == n0 + 5))
+	check_err $? "Number of groups was $n0, now is $n1, but $((n0 + 5)) expected"
+
+	${CFG}_entries_del "$locus" temp 5
+	check_err $? "Couldn't delete MDB entries"
+	local n2=$(bridge_${context}_ngroups_get "$locus")
+
+	((n2 == n0))
+	check_err $? "Number of groups was $n0, now is $n2, but should be back to $n0"
+
+	log_test "$CFG: $context: ngroups reporting"
+}
+
+test_8021d_ngroups_reporting_cfg4()
+{
+	test_ngroups_reporting cfg4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl4()
+{
+	test_ngroups_reporting ctl4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_cfg6()
+{
+	test_ngroups_reporting cfg6 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl6()
+{
+	test_ngroups_reporting ctl6 port "dev $swp1"
+}
+
+test_8021q_ngroups_reporting_cfg4()
+{
+	test_ngroups_reporting cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl4()
+{
+	test_ngroups_reporting ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_cfg6()
+{
+	test_ngroups_reporting cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl6()
+{
+	test_ngroups_reporting ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg4()
+{
+	test_ngroups_reporting cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl4()
+{
+	test_ngroups_reporting ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg6()
+{
+	test_ngroups_reporting cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl6()
+{
+	test_ngroups_reporting ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_ngroups_cross_vlan()
+{
+	local CFG=$1; shift
+
+	local locus1="dev $swp1 vid 10"
+	local locus2="dev $swp1 vid 20"
+
+	RET=0
+
+	local n10=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n20=$(bridge_port_vlan_ngroups_get "$locus2")
+	${CFG}_entries_add "$locus1" temp 5 111
+	check_err $? "Couldn't add MDB entries to VLAN 10"
+	local n11=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n21=$(bridge_port_vlan_ngroups_get "$locus2")
+
+	((n11 == n10 + 5))
+	check_err $? "Number of groups at VLAN 10 was $n10, now is $n11, but 5 entries added on VLAN 10, $((n10 + 5)) expected"
+
+	((n21 == n20))
+	check_err $? "Number of groups at VLAN 20 was $n20, now is $n21, but no change expected on VLAN 20"
+
+	${CFG}_entries_add "$locus2" temp 5 112
+	check_err $? "Couldn't add MDB entries to VLAN 20"
+	local n12=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n22=$(bridge_port_vlan_ngroups_get "$locus2")
+
+	((n12 == n11))
+	check_err $? "Number of groups at VLAN 10 was $n11, now is $n12, but no change expected on VLAN 10"
+
+	((n22 == n21 + 5))
+	check_err $? "Number of groups at VLAN 20 was $n21, now is $n22, but 5 entries added on VLAN 20, $((n21 + 5)) expected"
+
+	${CFG}_entries_del "$locus1" temp 5 111
+	check_err $? "Couldn't delete MDB entries from VLAN 10"
+	${CFG}_entries_del "$locus2" temp 5 112
+	check_err $? "Couldn't delete MDB entries from VLAN 20"
+	local n13=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n23=$(bridge_port_vlan_ngroups_get "$locus2")
+
+	((n13 == n10))
+	check_err $? "Number of groups at VLAN 10 was $n10, now is $n13, but should be back to $n10"
+
+	((n23 == n20))
+	check_err $? "Number of groups at VLAN 20 was $n20, now is $n23, but should be back to $n20"
+
+	log_test "$CFG: port_vlan: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg4()
+{
+	test_ngroups_cross_vlan cfg4
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl4()
+{
+	test_ngroups_cross_vlan ctl4
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg6()
+{
+	test_ngroups_cross_vlan cfg6
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl6()
+{
+	test_ngroups_cross_vlan ctl6
+}
+
+test_maxgroups_zero()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+	local max
+
+	max=$(bridge_${context}_maxgroups_get "$locus")
+	((max == 0))
+	check_err $? "Max groups on $locus should be 0, but $max reported"
+
+	bridge_${context}_maxgroups_set "$locus" 100
+	check_err $? "Failed to set max to 100"
+	max=$(bridge_${context}_maxgroups_get "$locus")
+	((max == 100))
+	check_err $? "Max groups expected to be 100, but $max reported"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "Couldn't set maximum to 0"
+
+	# Test that setting 0 explicitly still serves as infinity.
+	${CFG}_entries_add "$locus" temp 5
+	check_err $? "Adding 5 MDB entries failed but should have passed"
+	${CFG}_entries_del "$locus" temp 5
+	check_err $? "Couldn't delete MDB entries"
+
+	log_test "$CFG: $context maxgroups: reporting and treatment of 0"
+}
+
+test_8021d_maxgroups_zero_cfg4()
+{
+	test_maxgroups_zero cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl4()
+{
+	test_maxgroups_zero ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_cfg6()
+{
+	test_maxgroups_zero cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl6()
+{
+	test_maxgroups_zero ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_zero_cfg4()
+{
+	test_maxgroups_zero cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl4()
+{
+	test_maxgroups_zero ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_cfg6()
+{
+	test_maxgroups_zero cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl6()
+{
+	test_maxgroups_zero ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg4()
+{
+	test_maxgroups_zero cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl4()
+{
+	test_maxgroups_zero ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg6()
+{
+	test_maxgroups_zero cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl6()
+{
+	test_maxgroups_zero ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_zero_cross_vlan()
+{
+	local CFG=$1; shift
+
+	local locus0="dev $swp1"
+	local locus1="dev $swp1 vid 10"
+	local locus2="dev $swp1 vid 20"
+	local max
+
+	RET=0
+
+	bridge_port_vlan_maxgroups_set "$locus1" 100
+	check_err $? "$locus1: Failed to set max to 100"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 0))
+	check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 0))
+	check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+	bridge_port_vlan_maxgroups_set "$locus2" 100
+	check_err $? "$locus2: Failed to set max to 100"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 0))
+	check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 100))
+	check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+	bridge_port_maxgroups_set "$locus0" 100
+	check_err $? "$locus0: Failed to set max to 100"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 100))
+	check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 100))
+	check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+	bridge_port_vlan_maxgroups_set "$locus1" 0
+	check_err $? "$locus1: Failed to set max to 0"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 100))
+	check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 100))
+	check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+	bridge_port_vlan_maxgroups_set "$locus2" 0
+	check_err $? "$locus2: Failed to set max to 0"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 100))
+	check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 0))
+	check_err $? "$locus2: Max groups expected to be 0 but $max reported"
+
+	bridge_port_maxgroups_set "$locus0" 0
+	check_err $? "$locus0: Failed to set max to 0"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 0))
+	check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 0))
+	check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+	log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN maximums"
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg4()
+{
+	test_maxgroups_zero_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl4()
+{
+	test_maxgroups_zero_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg6()
+{
+	test_maxgroups_zero_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl6()
+{
+	test_maxgroups_zero_cross_vlan ctl6
+}
+
+test_maxgroups_too_low()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+
+	local n=$(bridge_${context}_ngroups_get "$locus")
+	local msg
+
+	${CFG}_entries_add "$locus" temp 5 111
+	check_err $? "$locus: Couldn't add MDB entries"
+
+	bridge_${context}_maxgroups_set "$locus" $((n+2))
+	check_err $? "$locus: Setting maxgroups to $((n+2)) failed"
+
+	msg=$(${CFG}_entries_add "$locus" temp 2 112 2>&1)
+	check_fail $? "$locus: Adding more entries passed when max<n"
+	bridge_maxgroups_errmsg_check_cfg "$msg"
+
+	${CFG}_entries_del "$locus" temp 5 111
+	check_err $? "$locus: Couldn't delete MDB entries"
+
+	${CFG}_entries_add "$locus" temp 2 112
+	check_err $? "$locus: Adding more entries failed"
+
+	${CFG}_entries_del "$locus" temp 2 112
+	check_err $? "$locus: Deleting more entries failed"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "$locus: Couldn't set maximum to 0"
+
+	log_test "$CFG: $context maxgroups: configure below ngroups"
+}
+
+test_8021d_maxgroups_too_low_cfg4()
+{
+	test_maxgroups_too_low cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl4()
+{
+	test_maxgroups_too_low ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_cfg6()
+{
+	test_maxgroups_too_low cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl6()
+{
+	test_maxgroups_too_low ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_low_cfg4()
+{
+	test_maxgroups_too_low cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl4()
+{
+	test_maxgroups_too_low ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_cfg6()
+{
+	test_maxgroups_too_low cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl6()
+{
+	test_maxgroups_too_low ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg4()
+{
+	test_maxgroups_too_low cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl4()
+{
+	test_maxgroups_too_low ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg6()
+{
+	test_maxgroups_too_low cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl6()
+{
+	test_maxgroups_too_low ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_entries()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+
+	local n=$(bridge_${context}_ngroups_get "$locus")
+	local msg
+
+	# Configure a low maximum
+	bridge_${context}_maxgroups_set "$locus" $((n+1))
+	check_err $? "$locus: Couldn't set maximum"
+
+	# Try to add more entries than the configured maximum
+	msg=$(${CFG}_entries_add "$locus" temp 5 2>&1)
+	check_fail $? "Adding 5 MDB entries passed, but should have failed"
+	bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+	# When adding entries through the control path, as many as possible
+	# get created. That's consistent with the mcast_hash_max behavior.
+	# So there, drop the entries explicitly.
+	if [[ ${CFG%[46]} == ctl ]]; then
+		${CFG}_entries_del "$locus" temp 17 2>&1
+	fi
+
+	local n2=$(bridge_${context}_ngroups_get "$locus")
+	((n2 == n))
+	check_err $? "Number of groups was $n, but after a failed attempt to add MDB entries it changed to $n2"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "$locus: Couldn't set maximum to 0"
+
+	log_test "$CFG: $context maxgroups: add too many MDB entries"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg4()
+{
+	test_maxgroups_too_many_entries cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl4()
+{
+	test_maxgroups_too_many_entries ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg6()
+{
+	test_maxgroups_too_many_entries cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl6()
+{
+	test_maxgroups_too_many_entries ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg4()
+{
+	test_maxgroups_too_many_entries cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl4()
+{
+	test_maxgroups_too_many_entries ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg6()
+{
+	test_maxgroups_too_many_entries cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl6()
+{
+	test_maxgroups_too_many_entries ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg4()
+{
+	test_maxgroups_too_many_entries cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl4()
+{
+	test_maxgroups_too_many_entries ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg6()
+{
+	test_maxgroups_too_many_entries cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl6()
+{
+	test_maxgroups_too_many_entries ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_cross_vlan()
+{
+	local CFG=$1; shift
+
+	RET=0
+
+	local locus0="dev $swp1"
+	local locus1="dev $swp1 vid 10"
+	local locus2="dev $swp1 vid 20"
+	local n1=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n2=$(bridge_port_vlan_ngroups_get "$locus2")
+	local msg
+
+	if ((n1 > n2)); then
+		local tmp=$n1
+		n1=$n2
+		n2=$tmp
+
+		tmp="$locus1"
+		locus1="$locus2"
+		locus2="$tmp"
+	fi
+
+	# Now 0 <= n1 <= n2.
+	${CFG}_entries_add "$locus2" temp 5 112
+	check_err $? "Couldn't add 5 entries"
+
+	n2=$(bridge_port_vlan_ngroups_get "$locus2")
+	# Now 0 <= n1 < n2-1.
+
+	# Setting locus1'maxgroups to n2-1 should pass. The number is
+	# smaller than both the absolute number of MDB entries, and in
+	# particular than number of locus2's number of entries, but it is
+	# large enough to cover locus1's entries. Thus we check that
+	# individual VLAN's ngroups are independent.
+	bridge_port_vlan_maxgroups_set "$locus1" $((n2-1))
+	check_err $? "Setting ${locus1}'s maxgroups to $((n2-1)) failed"
+
+	msg=$(${CFG}_entries_add "$locus1" temp $n2 111 2>&1)
+	check_fail $? "$locus1: Adding $n2 MDB entries passed, but should have failed"
+	bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+	bridge_port_maxgroups_set "$locus0" $((n1 + n2 + 2))
+	check_err $? "$locus0: Couldn't set maximum"
+
+	msg=$(${CFG}_entries_add "$locus1" temp 5 111 2>&1)
+	check_fail $? "$locus1: Adding 5 MDB entries passed, but should have failed"
+	bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+	# IGMP/MLD packets can cause several entries to be added, before
+	# the maximum is hit and the rest is then bounced. Remove what was
+	# committed, if anything.
+	${CFG}_entries_del "$locus1" temp 5 111 2>/dev/null
+
+	${CFG}_entries_add "$locus1" temp 2 111
+	check_err $? "$locus1: Adding 2 MDB entries failed, but should have passed"
+
+	${CFG}_entries_del "$locus1" temp 2 111
+	check_err $? "Couldn't delete MDB entries"
+
+	${CFG}_entries_del "$locus2" temp 5 112
+	check_err $? "Couldn't delete MDB entries"
+
+	bridge_port_vlan_maxgroups_set "$locus1" 0
+	check_err $? "$locus1: Couldn't set maximum to 0"
+
+	bridge_port_maxgroups_set "$locus0" 0
+	check_err $? "$locus0: Couldn't set maximum to 0"
+
+	log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg4()
+{
+	test_maxgroups_too_many_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl4()
+{
+	test_maxgroups_too_many_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg6()
+{
+	test_maxgroups_too_many_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl6()
+{
+	test_maxgroups_too_many_cross_vlan ctl6
+}
+
+test_vlan_attributes()
+{
+	local locus=$1; shift
+	local expect=$1; shift
+
+	RET=0
+
+	local max=$(bridge_port_vlan_maxgroups_get "$locus")
+	local n=$(bridge_port_vlan_ngroups_get "$locus")
+
+	eval "[[ $max $expect ]]"
+	check_err $? "$locus: maxgroups attribute expected to be $expect, but was $max"
+
+	eval "[[ $n $expect ]]"
+	check_err $? "$locus: ngroups attribute expected to be $expect, but was $n"
+
+	log_test "port_vlan: presence of ngroups and maxgroups attributes"
+}
+
+test_8021q_vlan_attributes()
+{
+	test_vlan_attributes "dev $swp1 vid 10" "== null"
+}
+
+test_8021qvs_vlan_attributes()
+{
+	test_vlan_attributes "dev $swp1 vid 10" "-ge 0"
+}
+
+test_toggle_vlan_snooping()
+{
+	local mode=$1; shift
+
+	RET=0
+
+	local CFG=cfg4
+	local context=port_vlan
+	local locus="dev $swp1 vid 10"
+
+	${CFG}_entries_add "$locus" $mode 5
+	check_err $? "Couldn't add MDB entries"
+
+	bridge_${context}_maxgroups_set "$locus" 100
+	check_err $? "Failed to set max to 100"
+
+	ip link set dev br0 type bridge mcast_vlan_snooping 0
+	sleep 1
+	ip link set dev br0 type bridge mcast_vlan_snooping 1
+
+	local n=$(bridge_${context}_ngroups_get "$locus")
+	local nn=$(bridge mdb show dev br0 | grep $swp1 | wc -l)
+	((nn == n))
+	check_err $? "mcast_n_groups expected to be $nn, but $n reported"
+
+	local max=$(bridge_${context}_maxgroups_get "$locus")
+	((max == 100))
+	check_err $? "Max groups expected to be 100 but $max reported"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "Failed to set max to 0"
+
+	log_test "$CFG: $context: $mode: mcast_vlan_snooping toggle"
+}
+
+test_toggle_vlan_snooping_temp()
+{
+	test_toggle_vlan_snooping temp
+}
+
+test_toggle_vlan_snooping_permanent()
+{
+	test_toggle_vlan_snooping permanent
+}
+
+# ngroup test suites
+
+test_8021d_ngroups_cfg4()
+{
+	test_8021d_ngroups_reporting_cfg4
+}
+
+test_8021d_ngroups_ctl4()
+{
+	test_8021d_ngroups_reporting_ctl4
+}
+
+test_8021d_ngroups_cfg6()
+{
+	test_8021d_ngroups_reporting_cfg6
+}
+
+test_8021d_ngroups_ctl6()
+{
+	test_8021d_ngroups_reporting_ctl6
+}
+
+test_8021q_ngroups_cfg4()
+{
+	test_8021q_ngroups_reporting_cfg4
+}
+
+test_8021q_ngroups_ctl4()
+{
+	test_8021q_ngroups_reporting_ctl4
+}
+
+test_8021q_ngroups_cfg6()
+{
+	test_8021q_ngroups_reporting_cfg6
+}
+
+test_8021q_ngroups_ctl6()
+{
+	test_8021q_ngroups_reporting_ctl6
+}
+
+test_8021qvs_ngroups_cfg4()
+{
+	test_8021qvs_ngroups_reporting_cfg4
+	test_8021qvs_ngroups_cross_vlan_cfg4
+}
+
+test_8021qvs_ngroups_ctl4()
+{
+	test_8021qvs_ngroups_reporting_ctl4
+	test_8021qvs_ngroups_cross_vlan_ctl4
+}
+
+test_8021qvs_ngroups_cfg6()
+{
+	test_8021qvs_ngroups_reporting_cfg6
+	test_8021qvs_ngroups_cross_vlan_cfg6
+}
+
+test_8021qvs_ngroups_ctl6()
+{
+	test_8021qvs_ngroups_reporting_ctl6
+	test_8021qvs_ngroups_cross_vlan_ctl6
+}
+
+# maxgroups test suites
+
+test_8021d_maxgroups_cfg4()
+{
+	test_8021d_maxgroups_zero_cfg4
+	test_8021d_maxgroups_too_low_cfg4
+	test_8021d_maxgroups_too_many_entries_cfg4
+}
+
+test_8021d_maxgroups_ctl4()
+{
+	test_8021d_maxgroups_zero_ctl4
+	test_8021d_maxgroups_too_low_ctl4
+	test_8021d_maxgroups_too_many_entries_ctl4
+}
+
+test_8021d_maxgroups_cfg6()
+{
+	test_8021d_maxgroups_zero_cfg6
+	test_8021d_maxgroups_too_low_cfg6
+	test_8021d_maxgroups_too_many_entries_cfg6
+}
+
+test_8021d_maxgroups_ctl6()
+{
+	test_8021d_maxgroups_zero_ctl6
+	test_8021d_maxgroups_too_low_ctl6
+	test_8021d_maxgroups_too_many_entries_ctl6
+}
+
+test_8021q_maxgroups_cfg4()
+{
+	test_8021q_maxgroups_zero_cfg4
+	test_8021q_maxgroups_too_low_cfg4
+	test_8021q_maxgroups_too_many_entries_cfg4
+}
+
+test_8021q_maxgroups_ctl4()
+{
+	test_8021q_maxgroups_zero_ctl4
+	test_8021q_maxgroups_too_low_ctl4
+	test_8021q_maxgroups_too_many_entries_ctl4
+}
+
+test_8021q_maxgroups_cfg6()
+{
+	test_8021q_maxgroups_zero_cfg6
+	test_8021q_maxgroups_too_low_cfg6
+	test_8021q_maxgroups_too_many_entries_cfg6
+}
+
+test_8021q_maxgroups_ctl6()
+{
+	test_8021q_maxgroups_zero_ctl6
+	test_8021q_maxgroups_too_low_ctl6
+	test_8021q_maxgroups_too_many_entries_ctl6
+}
+
+test_8021qvs_maxgroups_cfg4()
+{
+	test_8021qvs_maxgroups_zero_cfg4
+	test_8021qvs_maxgroups_zero_cross_vlan_cfg4
+	test_8021qvs_maxgroups_too_low_cfg4
+	test_8021qvs_maxgroups_too_many_entries_cfg4
+	test_8021qvs_maxgroups_too_many_cross_vlan_cfg4
+}
+
+test_8021qvs_maxgroups_ctl4()
+{
+	test_8021qvs_maxgroups_zero_ctl4
+	test_8021qvs_maxgroups_zero_cross_vlan_ctl4
+	test_8021qvs_maxgroups_too_low_ctl4
+	test_8021qvs_maxgroups_too_many_entries_ctl4
+	test_8021qvs_maxgroups_too_many_cross_vlan_ctl4
+}
+
+test_8021qvs_maxgroups_cfg6()
+{
+	test_8021qvs_maxgroups_zero_cfg6
+	test_8021qvs_maxgroups_zero_cross_vlan_cfg6
+	test_8021qvs_maxgroups_too_low_cfg6
+	test_8021qvs_maxgroups_too_many_entries_cfg6
+	test_8021qvs_maxgroups_too_many_cross_vlan_cfg6
+}
+
+test_8021qvs_maxgroups_ctl6()
+{
+	test_8021qvs_maxgroups_zero_ctl6
+	test_8021qvs_maxgroups_zero_cross_vlan_ctl6
+	test_8021qvs_maxgroups_too_low_ctl6
+	test_8021qvs_maxgroups_too_many_entries_ctl6
+	test_8021qvs_maxgroups_too_many_cross_vlan_ctl6
+}
+
+# other test suites
+
+test_8021qvs_toggle_vlan_snooping()
+{
+	test_toggle_vlan_snooping_temp
+	test_toggle_vlan_snooping_permanent
+}
+
+# test groups
+
+test_8021d()
+{
+	# Tests for vlan_filtering 0 mcast_vlan_snooping 0.
+
+	switch_create_8021d
+	setup_wait
+
+	test_8021d_ngroups_cfg4
+	test_8021d_ngroups_ctl4
+	test_8021d_ngroups_cfg6
+	test_8021d_ngroups_ctl6
+	test_8021d_maxgroups_cfg4
+	test_8021d_maxgroups_ctl4
+	test_8021d_maxgroups_cfg6
+	test_8021d_maxgroups_ctl6
+
+	switch_destroy
+}
+
+test_8021q()
+{
+	# Tests for vlan_filtering 1 mcast_vlan_snooping 0.
+
+	switch_create_8021q
+	setup_wait
+
+	test_8021q_vlan_attributes
+	test_8021q_ngroups_cfg4
+	test_8021q_ngroups_ctl4
+	test_8021q_ngroups_cfg6
+	test_8021q_ngroups_ctl6
+	test_8021q_maxgroups_cfg4
+	test_8021q_maxgroups_ctl4
+	test_8021q_maxgroups_cfg6
+	test_8021q_maxgroups_ctl6
+
+	switch_destroy
+}
+
+test_8021qvs()
+{
+	# Tests for vlan_filtering 1 mcast_vlan_snooping 1.
+
+	switch_create_8021qvs
+	setup_wait
+
+	test_8021qvs_vlan_attributes
+	test_8021qvs_ngroups_cfg4
+	test_8021qvs_ngroups_ctl4
+	test_8021qvs_ngroups_cfg6
+	test_8021qvs_ngroups_ctl6
+	test_8021qvs_maxgroups_cfg4
+	test_8021qvs_maxgroups_ctl4
+	test_8021qvs_maxgroups_cfg6
+	test_8021qvs_maxgroups_ctl6
+	test_8021qvs_toggle_vlan_snooping
+
+	switch_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 8306829bf845186ec8c470c771243016c30c3d74 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 6 Feb 2023 09:22:29 +0000
Subject: selftests/bpf: Fix spelling mistake "detecion" -> "detection"

There is a spelling mistake in a literal string. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230206092229.46416-1-colin.i.king@gmail.com
---
 tools/testing/selftests/bpf/xdp_features.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
index 10fad1243573..fce12165213b 100644
--- a/tools/testing/selftests/bpf/xdp_features.c
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -57,7 +57,7 @@ static void sig_handler(int sig)
 
 const char *argp_program_version = "xdp-features 0.0";
 const char argp_program_doc[] =
-"XDP features detecion application.\n"
+"XDP features detection application.\n"
 "\n"
 "XDP features application checks the XDP advertised features match detected ones.\n"
 "\n"
-- 
cgit v1.2.3


From c12e0d5f267d7eb45a2f8eaa9fd44eaa2871a95e Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 7 Feb 2023 19:44:58 +0100
Subject: self-tests: introduce self-tests for RPS default mask

Ensure that RPS default mask changes take place on
all newly created netns/devices and don't affect
existing ones.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile            |  1 +
 tools/testing/selftests/net/config              |  3 ++
 tools/testing/selftests/net/rps_default_mask.sh | 57 +++++++++++++++++++++++++
 3 files changed, 61 insertions(+)
 create mode 100755 tools/testing/selftests/net/rps_default_mask.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 951bd5342bc6..3364c548a23b 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -46,6 +46,7 @@ TEST_PROGS += stress_reuseport_listen.sh
 TEST_PROGS += l2_tos_ttl_inherit.sh
 TEST_PROGS += bind_bhash.sh
 TEST_PROGS += ip_local_port_range.sh
+TEST_PROGS += rps_default_mask.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index bd89198cd817..cc9fd55ab869 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -3,6 +3,9 @@ CONFIG_NET_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
+CONFIG_RPS=y
+CONFIG_SYSFS=y
+CONFIG_PROC_SYSCTL=y
 CONFIG_NET_VRF=y
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_IPV6=y
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
new file mode 100755
index 000000000000..c81c0ac7ddfe
--- /dev/null
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly ksft_skip=4
+readonly cpus=$(nproc)
+ret=0
+
+[ $cpus -gt 2 ] || exit $ksft_skip
+
+readonly INITIAL_RPS_DEFAULT_MASK=$(cat /proc/sys/net/core/rps_default_mask)
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+	echo $INITIAL_RPS_DEFAULT_MASK > /proc/sys/net/core/rps_default_mask
+	ip netns del $NETNS
+}
+
+chk_rps() {
+	local rps_mask expected_rps_mask=$3
+	local dev_name=$2
+	local msg=$1
+
+	rps_mask=$(ip netns exec $NETNS cat /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
+	printf "%-60s" "$msg"
+	if [ $rps_mask -eq $expected_rps_mask ]; then
+		echo "[ ok ]"
+	else
+		echo "[fail] expected $expected_rps_mask found $rps_mask"
+		ret=1
+	fi
+}
+
+trap cleanup EXIT
+
+echo 0 > /proc/sys/net/core/rps_default_mask
+setup
+chk_rps "empty rps_default_mask" lo 0
+cleanup
+
+echo 1 > /proc/sys/net/core/rps_default_mask
+setup
+chk_rps "non zero rps_default_mask" lo 1
+
+echo 3 > /proc/sys/net/core/rps_default_mask
+chk_rps "changing rps_default_mask dont affect existing netns" lo 1
+
+ip -n $NETNS link add type veth
+ip -n $NETNS link set dev veth0 up
+ip -n $NETNS link set dev veth1 up
+chk_rps "changing rps_default_mask affect newly created devices" veth0 3
+chk_rps "changing rps_default_mask affect newly created devices[II]" veth1 3
+exit $ret
-- 
cgit v1.2.3


From 795deb3f97472942780283e2af8f38625e3329aa Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 10 Feb 2023 01:11:55 +0100
Subject: selftests/bpf: Quote host tools

Using HOSTCC="ccache clang" breaks building the tests, since, when it's
forwarded to e.g. bpftool, the child make sees HOSTCC=ccache and
"clang" is considered a target. Fix by quoting it, and also HOSTLD and
HOSTAR for consistency.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230210001210.395194-2-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b2eb3201b85a..49bc1ba12d3a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -248,7 +248,7 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL)
 $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
 	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)			       \
-		    ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) 	       \
+		    ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" 	       \
 		    EXTRA_CFLAGS='-g -O0'				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
 		    LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/		       \
@@ -280,7 +280,8 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 		| $(HOST_BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR)                             \
 		    EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE=		       \
-		    OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
+		    OUTPUT=$(HOST_BUILD_DIR)/libbpf/			       \
+		    CC="$(HOSTCC)" LD="$(HOSTLD)"			       \
 		    DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
 endif
 
@@ -301,7 +302,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids	\
 		       $(TOOLSDIR)/lib/ctype.c			\
 		       $(TOOLSDIR)/lib/str_error_r.c
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids	\
-		CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \
+		CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \
 		LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \
 		OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
 
-- 
cgit v1.2.3


From 0589d16475ae69da30dde8d0064b3b834ee25310 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 10 Feb 2023 01:11:57 +0100
Subject: selftests/bpf: Split SAN_CFLAGS and SAN_LDFLAGS

Memory Sanitizer requires passing different options to CFLAGS and
LDFLAGS: besides the mandatory -fsanitize=memory, one needs to pass
header and library paths, and passing -L to a compilation step
triggers -Wunused-command-line-argument. So introduce a separate
variable for linker flags. Use $(SAN_CFLAGS) as a default in order to
avoid complicating the ASan usage.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230210001210.395194-4-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 49bc1ba12d3a..9b5786ac676e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -22,10 +22,11 @@ endif
 
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
+SAN_LDFLAGS	?= $(SAN_CFLAGS)
 CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS)	\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
-LDFLAGS += $(SAN_CFLAGS)
+LDFLAGS += $(SAN_LDFLAGS)
 LDLIBS += -lelf -lz -lrt -lpthread
 
 # Silence some warnings when compiled with clang
-- 
cgit v1.2.3


From 24a87b477c65c33841c0511d268f46a226271502 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 10 Feb 2023 01:11:58 +0100
Subject: selftests/bpf: Forward SAN_CFLAGS and SAN_LDFLAGS to runqslower and
 libbpf

To get useful results from the Memory Sanitizer, all code running in a
process needs to be instrumented. When building tests with other
sanitizers, it's not strictly necessary, but is also helpful.
So make sure runqslower and libbpf are compiled with SAN_CFLAGS and
linked with SAN_LDFLAGS.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230210001210.395194-5-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/Makefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 9b5786ac676e..c4b5c44cdee2 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -215,7 +215,9 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
 		    OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF)     \
 		    BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/		       \
 		    BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf			       \
-		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&	       \
+		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)		       \
+		    EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)'			       \
+		    EXTRA_LDFLAGS='$(SAN_LDFLAGS)' &&			       \
 		    cp $(RUNQSLOWER_OUTPUT)runqslower $@
 
 TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
@@ -272,7 +274,8 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   $(APIDIR)/linux/bpf.h					       \
 	   | $(BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
-		    EXTRA_CFLAGS='-g -O0'				       \
+		    EXTRA_CFLAGS='-g -O0 $(SAN_CFLAGS)'			       \
+		    EXTRA_LDFLAGS='$(SAN_LDFLAGS)'			       \
 		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
 
 ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
-- 
cgit v1.2.3


From 907300c7a66b3b58fc0039402aa14e0b9f11aad6 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 10 Feb 2023 01:11:59 +0100
Subject: selftests/bpf: Attach to fopen()/fclose() in uprobe_autoattach

malloc() and free() may be completely replaced by sanitizers, use
fopen() and fclose() instead.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230210001210.395194-6-iii@linux.ibm.com
---
 .../testing/selftests/bpf/prog_tests/uprobe_autoattach.c | 14 ++++++++------
 .../testing/selftests/bpf/progs/test_uprobe_autoattach.c | 16 ++++++++--------
 2 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
index 82807def0d24..6558c857e620 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -16,10 +16,10 @@ static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3,
 
 void test_uprobe_autoattach(void)
 {
+	const char *devnull_str = "/dev/null";
 	struct test_uprobe_autoattach *skel;
 	int trigger_ret;
-	size_t malloc_sz = 1;
-	char *mem;
+	FILE *devnull;
 
 	skel = test_uprobe_autoattach__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -36,16 +36,18 @@ void test_uprobe_autoattach(void)
 	skel->bss->test_pid = getpid();
 
 	/* trigger & validate shared library u[ret]probes attached by name */
-	mem = malloc(malloc_sz);
+	devnull = fopen(devnull_str, "r");
 
 	ASSERT_EQ(skel->bss->uprobe_byname_parm1, 1, "check_uprobe_byname_parm1");
 	ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
 	ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
 	ASSERT_EQ(skel->bss->uretprobe_byname_ret, trigger_ret, "check_uretprobe_byname_ret");
 	ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
-	ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1");
+	ASSERT_EQ(skel->bss->uprobe_byname2_parm1, (__u64)(long)devnull_str,
+		  "check_uprobe_byname2_parm1");
 	ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
-	ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_rc, (__u64)(long)devnull,
+		  "check_uretprobe_byname2_rc");
 	ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
 
 	ASSERT_EQ(skel->bss->a[0], 1, "arg1");
@@ -67,7 +69,7 @@ void test_uprobe_autoattach(void)
 	ASSERT_EQ(skel->bss->a[7], 8, "arg8");
 #endif
 
-	free(mem);
+	fclose(devnull);
 cleanup:
 	test_uprobe_autoattach__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
index 774ddeb45898..da4bf89d004c 100644
--- a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -13,9 +13,9 @@ int uprobe_byname_ran = 0;
 int uretprobe_byname_rc = 0;
 int uretprobe_byname_ret = 0;
 int uretprobe_byname_ran = 0;
-size_t uprobe_byname2_parm1 = 0;
+u64 uprobe_byname2_parm1 = 0;
 int uprobe_byname2_ran = 0;
-char *uretprobe_byname2_rc = NULL;
+u64 uretprobe_byname2_rc = 0;
 int uretprobe_byname2_ran = 0;
 
 int test_pid;
@@ -88,28 +88,28 @@ int BPF_URETPROBE(handle_uretprobe_byname, int ret)
 }
 
 
-SEC("uprobe/libc.so.6:malloc")
-int handle_uprobe_byname2(struct pt_regs *ctx)
+SEC("uprobe/libc.so.6:fopen")
+int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode)
 {
 	int pid = bpf_get_current_pid_tgid() >> 32;
 
 	/* ignore irrelevant invocations */
 	if (test_pid != pid)
 		return 0;
-	uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx);
+	uprobe_byname2_parm1 = (u64)(long)pathname;
 	uprobe_byname2_ran = 3;
 	return 0;
 }
 
-SEC("uretprobe/libc.so.6:malloc")
-int handle_uretprobe_byname2(struct pt_regs *ctx)
+SEC("uretprobe/libc.so.6:fopen")
+int BPF_URETPROBE(handle_uretprobe_byname2, void *ret)
 {
 	int pid = bpf_get_current_pid_tgid() >> 32;
 
 	/* ignore irrelevant invocations */
 	if (test_pid != pid)
 		return 0;
-	uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx);
+	uretprobe_byname2_rc = (u64)(long)ret;
 	uretprobe_byname2_ran = 4;
 	return 0;
 }
-- 
cgit v1.2.3


From 202702e890a41412a7de970b84a970ba1d5001c9 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 10 Feb 2023 01:12:00 +0100
Subject: selftests/bpf: Attach to fopen()/fclose() in attach_probe

malloc() and free() may be completely replaced by sanitizers, use
fopen() and fclose() instead.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230210001210.395194-7-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/prog_tests/attach_probe.c | 10 +++++-----
 tools/testing/selftests/bpf/progs/test_attach_probe.c | 11 ++++++-----
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 9566d9d2f6ee..56374c8b5436 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -33,8 +33,8 @@ void test_attach_probe(void)
 	struct test_attach_probe* skel;
 	ssize_t uprobe_offset, ref_ctr_offset;
 	struct bpf_link *uprobe_err_link;
+	FILE *devnull;
 	bool legacy;
-	char *mem;
 
 	/* Check if new-style kprobe/uprobe API is supported.
 	 * Kernels that support new FD-based kprobe and uprobe BPF attachment
@@ -147,7 +147,7 @@ void test_attach_probe(void)
 	/* test attach by name for a library function, using the library
 	 * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
 	 */
-	uprobe_opts.func_name = "malloc";
+	uprobe_opts.func_name = "fopen";
 	uprobe_opts.retprobe = false;
 	skel->links.handle_uprobe_byname2 =
 			bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2,
@@ -157,7 +157,7 @@ void test_attach_probe(void)
 	if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
 		goto cleanup;
 
-	uprobe_opts.func_name = "free";
+	uprobe_opts.func_name = "fclose";
 	uprobe_opts.retprobe = true;
 	skel->links.handle_uretprobe_byname2 =
 			bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2,
@@ -195,8 +195,8 @@ void test_attach_probe(void)
 	usleep(1);
 
 	/* trigger & validate shared library u[ret]probes attached by name */
-	mem = malloc(1);
-	free(mem);
+	devnull = fopen("/dev/null", "r");
+	fclose(devnull);
 
 	/* trigger & validate uprobe & uretprobe */
 	trigger_func();
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index a1e45fec8938..3b5dc34d23e9 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -92,18 +92,19 @@ int handle_uretprobe_byname(struct pt_regs *ctx)
 }
 
 SEC("uprobe")
-int handle_uprobe_byname2(struct pt_regs *ctx)
+int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode)
 {
-	unsigned int size = PT_REGS_PARM1(ctx);
+	char mode_buf[2] = {};
 
-	/* verify malloc size */
-	if (size == 1)
+	/* verify fopen mode */
+	bpf_probe_read_user(mode_buf, sizeof(mode_buf), mode);
+	if (mode_buf[0] == 'r' && mode_buf[1] == 0)
 		uprobe_byname2_res = 7;
 	return 0;
 }
 
 SEC("uretprobe")
-int handle_uretprobe_byname2(struct pt_regs *ctx)
+int BPF_URETPROBE(handle_uretprobe_byname2, void *ret)
 {
 	uretprobe_byname2_res = 8;
 	return 0;
-- 
cgit v1.2.3


From 049139126ec7ab55b568b2935aa7db5e6657258c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 9 Feb 2023 09:18:52 +0200
Subject: selftests: forwarding: Add MDB dump test cases

The kernel maintains three markers for the MDB dump:

1. The last bridge device from which the MDB was dumped.
2. The last MDB entry from which the MDB was dumped.
3. The last port-group entry that was dumped.

Add test cases for large scale MDB dump to make sure that all the
configured entries are dumped and that the markers are used correctly.

Specifically, create 2 bridges with 32 ports and add 256 MDB entries in
which all the ports are member of. Test that each bridge reports 8192
(256 * 32) permanent entries. Do that with IPv4, IPv6 and L2 MDB
entries.

On my system, MDB dump of the above is contained in about 50 netlink
messages.

Example output:

 # ./bridge_mdb.sh
 [...]
 INFO: # Large scale dump tests
 TEST: IPv4 large scale dump tests                                   [ OK ]
 TEST: IPv6 large scale dump tests                                   [ OK ]
 TEST: L2 large scale dump tests                                     [ OK ]
 [...]

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mdb.sh | 99 ++++++++++++++++++++++
 1 file changed, 99 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index b48867d8cadf..ae3f9462a2b6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -742,10 +742,109 @@ cfg_test_port()
 	cfg_test_port_l2
 }
 
+ipv4_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "239.1.1.$i"
+	done
+}
+
+ipv6_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "ff0e::$(printf %x $i)"
+	done
+}
+
+l2_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "01:00:00:00:00:$(printf %02x $i)"
+	done
+}
+
+cfg_test_dump_common()
+{
+	local name=$1; shift
+	local fn=$1; shift
+	local max_bridges=2
+	local max_grps=256
+	local max_ports=32
+	local num_entries
+	local batch_file
+	local grp
+	local i j
+
+	RET=0
+
+	# Create net devices.
+	for i in $(seq 1 $max_bridges); do
+		ip link add name br-test${i} up type bridge vlan_filtering 1 \
+			mcast_snooping 1
+		for j in $(seq 1 $max_ports); do
+			ip link add name br-test${i}-du${j} up \
+				master br-test${i} type dummy
+		done
+	done
+
+	# Create batch file with MDB entries.
+	batch_file=$(mktemp)
+	for i in $(seq 1 $max_bridges); do
+		for j in $(seq 1 $max_ports); do
+			for grp in $($fn $max_grps); do
+				echo "mdb add dev br-test${i} \
+					port br-test${i}-du${j} grp $grp \
+					permanent vid 1" >> $batch_file
+			done
+		done
+	done
+
+	# Program the batch file and check for expected number of entries.
+	bridge -b $batch_file
+	for i in $(seq 1 $max_bridges); do
+		num_entries=$(bridge mdb show dev br-test${i} | \
+			grep "permanent" | wc -l)
+		[[ $num_entries -eq $((max_grps * max_ports)) ]]
+		check_err $? "Wrong number of entries in br-test${i}"
+	done
+
+	# Cleanup.
+	rm $batch_file
+	for i in $(seq 1 $max_bridges); do
+		ip link del dev br-test${i}
+		for j in $(seq $max_ports); do
+			ip link del dev br-test${i}-du${j}
+		done
+	done
+
+	log_test "$name large scale dump tests"
+}
+
+# Check large scale dump.
+cfg_test_dump()
+{
+	echo
+	log_info "# Large scale dump tests"
+
+	cfg_test_dump_common "IPv4" ipv4_grps_get
+	cfg_test_dump_common "IPv6" ipv6_grps_get
+	cfg_test_dump_common "L2" l2_grps_get
+}
+
 cfg_test()
 {
 	cfg_test_host
 	cfg_test_port
+	cfg_test_dump
 }
 
 __fwd_test_host_ip()
-- 
cgit v1.2.3


From 0b0757244754ea1d0721195c824770f5576e119e Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 9 Feb 2023 00:12:11 +0100
Subject: selftests/bpf: Fix out-of-srctree build

Building BPF selftests out of srctree fails with:

  make: *** No rule to make target '/linux-build//ima_setup.sh', needed by 'ima_setup.sh'.  Stop.

The culprit is the rule that defines convenient shorthands like
"make test_progs", which builds $(OUTPUT)/test_progs. These shorthands
make sense only for binaries that are built though; scripts that live
in the source tree do not end up in $(OUTPUT).

Therefore drop $(TEST_PROGS) and $(TEST_PROGS_EXTENDED) from the rule.

The issue exists for a while, but it became a problem only after commit
d68ae4982cb7 ("selftests/bpf: Install all required files to run selftests"),
which added dependencies on these scripts.

Fixes: 03dcb78460c2 ("selftests/bpf: Add simple per-test targets to Makefile")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230208231211.283606-1-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/Makefile | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c4b5c44cdee2..f7771592a920 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -152,8 +152,6 @@ endif
 # NOTE: Semicolon at the end is critical to override lib.mk's default static
 # rule for binaries.
 $(notdir $(TEST_GEN_PROGS)						\
-	 $(TEST_PROGS)							\
-	 $(TEST_PROGS_EXTENDED)						\
 	 $(TEST_GEN_PROGS_EXTENDED)					\
 	 $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
 
-- 
cgit v1.2.3


From 6a3cd3318ff65622415e34e8ee39d76331e7c869 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Sun, 12 Feb 2023 01:27:07 -0800
Subject: bpf: Migrate release_on_unlock logic to non-owning ref semantics

This patch introduces non-owning reference semantics to the verifier,
specifically linked_list API kfunc handling. release_on_unlock logic for
refs is refactored - with small functional changes - to implement these
semantics, and bpf_list_push_{front,back} are migrated to use them.

When a list node is pushed to a list, the program still has a pointer to
the node:

  n = bpf_obj_new(typeof(*n));

  bpf_spin_lock(&l);
  bpf_list_push_back(&l, n);
  /* n still points to the just-added node */
  bpf_spin_unlock(&l);

What the verifier considers n to be after the push, and thus what can be
done with n, are changed by this patch.

Common properties both before/after this patch:
  * After push, n is only a valid reference to the node until end of
    critical section
  * After push, n cannot be pushed to any list
  * After push, the program can read the node's fields using n

Before:
  * After push, n retains the ref_obj_id which it received on
    bpf_obj_new, but the associated bpf_reference_state's
    release_on_unlock field is set to true
    * release_on_unlock field and associated logic is used to implement
      "n is only a valid ref until end of critical section"
  * After push, n cannot be written to, the node must be removed from
    the list before writing to its fields
  * After push, n is marked PTR_UNTRUSTED

After:
  * After push, n's ref is released and ref_obj_id set to 0. NON_OWN_REF
    type flag is added to reg's type, indicating that it's a non-owning
    reference.
    * NON_OWN_REF flag and logic is used to implement "n is only a
      valid ref until end of critical section"
  * n can be written to (except for special fields e.g. bpf_list_node,
    timer, ...)

Summary of specific implementation changes to achieve the above:

  * release_on_unlock field, ref_set_release_on_unlock helper, and logic
    to "release on unlock" based on that field are removed

  * The anonymous active_lock struct used by bpf_verifier_state is
    pulled out into a named struct bpf_active_lock.

  * NON_OWN_REF type flag is introduced along with verifier logic
    changes to handle non-owning refs

  * Helpers are added to use NON_OWN_REF flag to implement non-owning
    ref semantics as described above
    * invalidate_non_owning_refs - helper to clobber all non-owning refs
      matching a particular bpf_active_lock identity. Replaces
      release_on_unlock logic in process_spin_lock.
    * ref_set_non_owning - set NON_OWN_REF type flag after doing some
      sanity checking
    * ref_convert_owning_non_owning - convert owning reference w/
      specified ref_obj_id to non-owning references. Set NON_OWN_REF
      flag for each reg with that ref_obj_id and 0-out its ref_obj_id

  * Update linked_list selftests to account for minor semantic
    differences introduced by this patch
    * Writes to a release_on_unlock node ref are not allowed, while
      writes to non-owning reference pointees are. As a result the
      linked_list "write after push" failure tests are no longer scenarios
      that should fail.
    * The test##missing_lock##op and test##incorrect_lock##op
      macro-generated failure tests need to have a valid node argument in
      order to have the same error output as before. Otherwise
      verification will fail early and the expected error output won't be seen.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230212092715.1422619-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |   6 +
 include/linux/bpf_verifier.h                       |  38 +++--
 kernel/bpf/verifier.c                              | 168 +++++++++++++++------
 .../testing/selftests/bpf/prog_tests/linked_list.c |   2 -
 tools/testing/selftests/bpf/progs/linked_list.c    |   2 +-
 .../testing/selftests/bpf/progs/linked_list_fail.c | 100 +++++++-----
 6 files changed, 206 insertions(+), 110 deletions(-)

(limited to 'tools/testing')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4385418118f6..8b5d0b4c4ada 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -181,6 +181,7 @@ enum btf_field_type {
 	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
 	BPF_LIST_HEAD  = (1 << 4),
 	BPF_LIST_NODE  = (1 << 5),
+	BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD,
 };
 
 struct btf_field_kptr {
@@ -576,6 +577,11 @@ enum bpf_type_flag {
 	/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
 	MEM_RCU			= BIT(13 + BPF_BASE_TYPE_BITS),
 
+	/* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning.
+	 * Currently only valid for linked-list and rbtree nodes.
+	 */
+	NON_OWN_REF		= BIT(14 + BPF_BASE_TYPE_BITS),
+
 	__BPF_TYPE_FLAG_MAX,
 	__BPF_TYPE_LAST_FLAG	= __BPF_TYPE_FLAG_MAX - 1,
 };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index aa83de1fe755..cf1bb1cf4a7b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -43,6 +43,22 @@ enum bpf_reg_liveness {
 	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
 };
 
+/* For every reg representing a map value or allocated object pointer,
+ * we consider the tuple of (ptr, id) for them to be unique in verifier
+ * context and conside them to not alias each other for the purposes of
+ * tracking lock state.
+ */
+struct bpf_active_lock {
+	/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
+	 * there's no active lock held, and other fields have no
+	 * meaning. If non-NULL, it indicates that a lock is held and
+	 * id member has the reg->id of the register which can be >= 0.
+	 */
+	void *ptr;
+	/* This will be reg->id */
+	u32 id;
+};
+
 struct bpf_reg_state {
 	/* Ordering of fields matters.  See states_equal() */
 	enum bpf_reg_type type;
@@ -226,11 +242,6 @@ struct bpf_reference_state {
 	 * exiting a callback function.
 	 */
 	int callback_ref;
-	/* Mark the reference state to release the registers sharing the same id
-	 * on bpf_spin_unlock (for nodes that we will lose ownership to but are
-	 * safe to access inside the critical section).
-	 */
-	bool release_on_unlock;
 };
 
 /* state of the program:
@@ -331,21 +342,8 @@ struct bpf_verifier_state {
 	u32 branches;
 	u32 insn_idx;
 	u32 curframe;
-	/* For every reg representing a map value or allocated object pointer,
-	 * we consider the tuple of (ptr, id) for them to be unique in verifier
-	 * context and conside them to not alias each other for the purposes of
-	 * tracking lock state.
-	 */
-	struct {
-		/* This can either be reg->map_ptr or reg->btf. If ptr is NULL,
-		 * there's no active lock held, and other fields have no
-		 * meaning. If non-NULL, it indicates that a lock is held and
-		 * id member has the reg->id of the register which can be >= 0.
-		 */
-		void *ptr;
-		/* This will be reg->id */
-		u32 id;
-	} active_lock;
+
+	struct bpf_active_lock active_lock;
 	bool speculative;
 	bool active_rcu_lock;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 388245e8826e..f176bc15c879 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -190,6 +190,9 @@ struct bpf_verifier_stack_elem {
 
 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
+static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
+static int ref_set_non_owning(struct bpf_verifier_env *env,
+			      struct bpf_reg_state *reg);
 
 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 {
@@ -457,6 +460,11 @@ static bool type_is_ptr_alloc_obj(u32 type)
 	return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
 }
 
+static bool type_is_non_owning_ref(u32 type)
+{
+	return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
+}
+
 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
 {
 	struct btf_record *rec = NULL;
@@ -1073,6 +1081,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 				verbose_a("id=%d", reg->id);
 			if (reg->ref_obj_id)
 				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
+			if (type_is_non_owning_ref(reg->type))
+				verbose_a("%s", "non_own_ref");
 			if (t != SCALAR_VALUE)
 				verbose_a("off=%d", reg->off);
 			if (type_is_pkt_pointer(t))
@@ -5052,7 +5062,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 			return -EACCES;
 		}
 
-		if (type_is_alloc(reg->type) && !reg->ref_obj_id) {
+		if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
+		    !reg->ref_obj_id) {
 			verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
 			return -EFAULT;
 		}
@@ -6042,9 +6053,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
 			cur->active_lock.ptr = btf;
 		cur->active_lock.id = reg->id;
 	} else {
-		struct bpf_func_state *fstate = cur_func(env);
 		void *ptr;
-		int i;
 
 		if (map)
 			ptr = map;
@@ -6060,25 +6069,11 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
 			verbose(env, "bpf_spin_unlock of different lock\n");
 			return -EINVAL;
 		}
-		cur->active_lock.ptr = NULL;
-		cur->active_lock.id = 0;
 
-		for (i = fstate->acquired_refs - 1; i >= 0; i--) {
-			int err;
+		invalidate_non_owning_refs(env);
 
-			/* Complain on error because this reference state cannot
-			 * be freed before this point, as bpf_spin_lock critical
-			 * section does not allow functions that release the
-			 * allocated object immediately.
-			 */
-			if (!fstate->refs[i].release_on_unlock)
-				continue;
-			err = release_reference(env, fstate->refs[i].id);
-			if (err) {
-				verbose(env, "failed to release release_on_unlock reference");
-				return err;
-			}
-		}
+		cur->active_lock.ptr = NULL;
+		cur->active_lock.id = 0;
 	}
 	return 0;
 }
@@ -6546,6 +6541,23 @@ found:
 	return 0;
 }
 
+static struct btf_field *
+reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
+{
+	struct btf_field *field;
+	struct btf_record *rec;
+
+	rec = reg_btf_record(reg);
+	if (!rec)
+		return NULL;
+
+	field = btf_record_find(rec, off, fields);
+	if (!field)
+		return NULL;
+
+	return field;
+}
+
 int check_func_arg_reg_off(struct bpf_verifier_env *env,
 			   const struct bpf_reg_state *reg, int regno,
 			   enum bpf_arg_type arg_type)
@@ -6567,6 +6579,18 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 		 */
 		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
 			return 0;
+
+		if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) {
+			if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT))
+				return __check_ptr_off_reg(env, reg, regno, true);
+
+			verbose(env, "R%d must have zero offset when passed to release func\n",
+				regno);
+			verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
+				kernel_type_name(reg->btf, reg->btf_id), reg->off);
+			return -EINVAL;
+		}
+
 		/* Doing check_ptr_off_reg check for the offset will catch this
 		 * because fixed_off_ok is false, but checking here allows us
 		 * to give the user a better error message.
@@ -6601,6 +6625,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	case PTR_TO_BTF_ID | PTR_TRUSTED:
 	case PTR_TO_BTF_ID | MEM_RCU:
 	case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
+	case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
 		/* When referenced PTR_TO_BTF_ID is passed to release function,
 		 * its fixed offset must be 0. In the other cases, fixed offset
 		 * can be non-zero. This was already checked above. So pass
@@ -7363,6 +7388,17 @@ static int release_reference(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
+{
+	struct bpf_func_state *unused;
+	struct bpf_reg_state *reg;
+
+	bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
+		if (type_is_non_owning_ref(reg->type))
+			__mark_reg_unknown(env, reg);
+	}));
+}
+
 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
 				    struct bpf_reg_state *regs)
 {
@@ -8915,38 +8951,54 @@ static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
 	return 0;
 }
 
-static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id)
+static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
-	struct bpf_func_state *state = cur_func(env);
+	struct bpf_verifier_state *state = env->cur_state;
+
+	if (!state->active_lock.ptr) {
+		verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
+		return -EFAULT;
+	}
+
+	if (type_flag(reg->type) & NON_OWN_REF) {
+		verbose(env, "verifier internal error: NON_OWN_REF already set\n");
+		return -EFAULT;
+	}
+
+	reg->type |= NON_OWN_REF;
+	return 0;
+}
+
+static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
+{
+	struct bpf_func_state *state, *unused;
 	struct bpf_reg_state *reg;
 	int i;
 
-	/* bpf_spin_lock only allows calling list_push and list_pop, no BPF
-	 * subprogs, no global functions. This means that the references would
-	 * not be released inside the critical section but they may be added to
-	 * the reference state, and the acquired_refs are never copied out for a
-	 * different frame as BPF to BPF calls don't work in bpf_spin_lock
-	 * critical sections.
-	 */
+	state = cur_func(env);
+
 	if (!ref_obj_id) {
-		verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n");
+		verbose(env, "verifier internal error: ref_obj_id is zero for "
+			     "owning -> non-owning conversion\n");
 		return -EFAULT;
 	}
+
 	for (i = 0; i < state->acquired_refs; i++) {
-		if (state->refs[i].id == ref_obj_id) {
-			if (state->refs[i].release_on_unlock) {
-				verbose(env, "verifier internal error: expected false release_on_unlock");
-				return -EFAULT;
+		if (state->refs[i].id != ref_obj_id)
+			continue;
+
+		/* Clear ref_obj_id here so release_reference doesn't clobber
+		 * the whole reg
+		 */
+		bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
+			if (reg->ref_obj_id == ref_obj_id) {
+				reg->ref_obj_id = 0;
+				ref_set_non_owning(env, reg);
 			}
-			state->refs[i].release_on_unlock = true;
-			/* Now mark everyone sharing same ref_obj_id as untrusted */
-			bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
-				if (reg->ref_obj_id == ref_obj_id)
-					reg->type |= PTR_UNTRUSTED;
-			}));
-			return 0;
-		}
+		}));
+		return 0;
 	}
+
 	verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
 	return -EFAULT;
 }
@@ -9081,7 +9133,6 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
 {
 	const struct btf_type *et, *t;
 	struct btf_field *field;
-	struct btf_record *rec;
 	u32 list_node_off;
 
 	if (meta->btf != btf_vmlinux ||
@@ -9098,9 +9149,8 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	rec = reg_btf_record(reg);
 	list_node_off = reg->off + reg->var_off.value;
-	field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
+	field = reg_find_field_offset(reg, list_node_off, BPF_LIST_NODE);
 	if (!field || field->offset != list_node_off) {
 		verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
 		return -EINVAL;
@@ -9126,8 +9176,8 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
 			btf_name_by_offset(field->graph_root.btf, et->name_off));
 		return -EINVAL;
 	}
-	/* Set arg#1 for expiration after unlock */
-	return ref_set_release_on_unlock(env, reg->ref_obj_id);
+
+	return 0;
 }
 
 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
@@ -9406,11 +9456,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    int *insn_idx_p)
 {
 	const struct btf_type *t, *func, *func_proto, *ptr_type;
+	u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id;
 	struct bpf_reg_state *regs = cur_regs(env);
 	const char *func_name, *ptr_type_name;
 	bool sleepable, rcu_lock, rcu_unlock;
 	struct bpf_kfunc_call_arg_meta meta;
-	u32 i, nargs, func_id, ptr_type_id;
 	int err, insn_idx = *insn_idx_p;
 	const struct btf_param *args;
 	const struct btf_type *ret_t;
@@ -9505,6 +9555,24 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		}
 	}
 
+	if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
+	    meta.func_id == special_kfunc_list[KF_bpf_list_push_back]) {
+		release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
+		err = ref_convert_owning_non_owning(env, release_ref_obj_id);
+		if (err) {
+			verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
+				func_name, func_id);
+			return err;
+		}
+
+		err = release_reference(env, release_ref_obj_id);
+		if (err) {
+			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+				func_name, func_id);
+			return err;
+		}
+	}
+
 	for (i = 0; i < CALLER_SAVED_REGS; i++)
 		mark_reg_not_init(env, regs, caller_saved[i]);
 
@@ -11825,8 +11893,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 		 */
 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
 			return;
-		if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off))
+		if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
+		    WARN_ON_ONCE(reg->off))
 			return;
+
 		if (is_null) {
 			reg->type = SCALAR_VALUE;
 			/* We don't need id and ref_obj_id from this point
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 9a7d4c47af63..2592b8aa5e41 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -78,8 +78,6 @@ static struct {
 	{ "direct_write_head", "direct access to bpf_list_head is disallowed" },
 	{ "direct_read_node", "direct access to bpf_list_node is disallowed" },
 	{ "direct_write_node", "direct access to bpf_list_node is disallowed" },
-	{ "write_after_push_front", "only read is supported" },
-	{ "write_after_push_back", "only read is supported" },
 	{ "use_after_unlock_push_front", "invalid mem access 'scalar'" },
 	{ "use_after_unlock_push_back", "invalid mem access 'scalar'" },
 	{ "double_push_front", "arg#1 expected pointer to allocated object" },
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 4ad88da5cda2..4fa4a9b01bde 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -260,7 +260,7 @@ int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head
 {
 	int ret;
 
-	ret = list_push_pop_multiple(lock ,head, false);
+	ret = list_push_pop_multiple(lock, head, false);
 	if (ret)
 		return ret;
 	return list_push_pop_multiple(lock, head, true);
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 1d9017240e19..69cdc07cba13 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -54,28 +54,44 @@
 		return 0;                                   \
 	}
 
-CHECK(kptr, push_front, &f->head);
-CHECK(kptr, push_back, &f->head);
 CHECK(kptr, pop_front, &f->head);
 CHECK(kptr, pop_back, &f->head);
 
-CHECK(global, push_front, &ghead);
-CHECK(global, push_back, &ghead);
 CHECK(global, pop_front, &ghead);
 CHECK(global, pop_back, &ghead);
 
-CHECK(map, push_front, &v->head);
-CHECK(map, push_back, &v->head);
 CHECK(map, pop_front, &v->head);
 CHECK(map, pop_back, &v->head);
 
-CHECK(inner_map, push_front, &iv->head);
-CHECK(inner_map, push_back, &iv->head);
 CHECK(inner_map, pop_front, &iv->head);
 CHECK(inner_map, pop_back, &iv->head);
 
 #undef CHECK
 
+#define CHECK(test, op, hexpr, nexpr)					\
+	SEC("?tc")							\
+	int test##_missing_lock_##op(void *ctx)				\
+	{								\
+		INIT;							\
+		void (*p)(void *, void *) = (void *)&bpf_list_##op;	\
+		p(hexpr, nexpr);					\
+		return 0;						\
+	}
+
+CHECK(kptr, push_front, &f->head, b);
+CHECK(kptr, push_back, &f->head, b);
+
+CHECK(global, push_front, &ghead, f);
+CHECK(global, push_back, &ghead, f);
+
+CHECK(map, push_front, &v->head, f);
+CHECK(map, push_back, &v->head, f);
+
+CHECK(inner_map, push_front, &iv->head, f);
+CHECK(inner_map, push_back, &iv->head, f);
+
+#undef CHECK
+
 #define CHECK(test, op, lexpr, hexpr)                       \
 	SEC("?tc")                                          \
 	int test##_incorrect_lock_##op(void *ctx)           \
@@ -108,11 +124,47 @@ CHECK(inner_map, pop_back, &iv->head);
 	CHECK(inner_map_global, op, &iv->lock, &ghead);        \
 	CHECK(inner_map_map, op, &iv->lock, &v->head);
 
-CHECK_OP(push_front);
-CHECK_OP(push_back);
 CHECK_OP(pop_front);
 CHECK_OP(pop_back);
 
+#undef CHECK
+#undef CHECK_OP
+
+#define CHECK(test, op, lexpr, hexpr, nexpr)				\
+	SEC("?tc")							\
+	int test##_incorrect_lock_##op(void *ctx)			\
+	{								\
+		INIT;							\
+		void (*p)(void *, void*) = (void *)&bpf_list_##op;	\
+		bpf_spin_lock(lexpr);					\
+		p(hexpr, nexpr);					\
+		return 0;						\
+	}
+
+#define CHECK_OP(op)							\
+	CHECK(kptr_kptr, op, &f1->lock, &f2->head, b);			\
+	CHECK(kptr_global, op, &f1->lock, &ghead, f);			\
+	CHECK(kptr_map, op, &f1->lock, &v->head, f);			\
+	CHECK(kptr_inner_map, op, &f1->lock, &iv->head, f);		\
+									\
+	CHECK(global_global, op, &glock2, &ghead, f);			\
+	CHECK(global_kptr, op, &glock, &f1->head, b);			\
+	CHECK(global_map, op, &glock, &v->head, f);			\
+	CHECK(global_inner_map, op, &glock, &iv->head, f);		\
+									\
+	CHECK(map_map, op, &v->lock, &v2->head, f);			\
+	CHECK(map_kptr, op, &v->lock, &f2->head, b);			\
+	CHECK(map_global, op, &v->lock, &ghead, f);			\
+	CHECK(map_inner_map, op, &v->lock, &iv->head, f);		\
+									\
+	CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, f);	\
+	CHECK(inner_map_kptr, op, &iv->lock, &f2->head, b);		\
+	CHECK(inner_map_global, op, &iv->lock, &ghead, f);		\
+	CHECK(inner_map_map, op, &iv->lock, &v->head, f);
+
+CHECK_OP(push_front);
+CHECK_OP(push_back);
+
 #undef CHECK
 #undef CHECK_OP
 #undef INIT
@@ -303,34 +355,6 @@ int direct_write_node(void *ctx)
 	return 0;
 }
 
-static __always_inline
-int write_after_op(void (*push_op)(void *head, void *node))
-{
-	struct foo *f;
-
-	f = bpf_obj_new(typeof(*f));
-	if (!f)
-		return 0;
-	bpf_spin_lock(&glock);
-	push_op(&ghead, &f->node);
-	f->data = 42;
-	bpf_spin_unlock(&glock);
-
-	return 0;
-}
-
-SEC("?tc")
-int write_after_push_front(void *ctx)
-{
-	return write_after_op((void *)bpf_list_push_front);
-}
-
-SEC("?tc")
-int write_after_push_back(void *ctx)
-{
-	return write_after_op((void *)bpf_list_push_back);
-}
-
 static __always_inline
 int use_after_unlock(void (*op)(void *head, void *node))
 {
-- 
cgit v1.2.3


From 9c395c1b99bd23f74bc628fa000480c49593d17f Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Mon, 13 Feb 2023 16:40:10 -0800
Subject: bpf: Add basic bpf_rb_{root,node} support

This patch adds special BPF_RB_{ROOT,NODE} btf_field_types similar to
BPF_LIST_{HEAD,NODE}, adds the necessary plumbing to detect the new
types, and adds bpf_rb_root_free function for freeing bpf_rb_root in
map_values.

structs bpf_rb_root and bpf_rb_node are opaque types meant to
obscure structs rb_root_cached rb_node, respectively.

btf_struct_access will prevent BPF programs from touching these special
fields automatically now that they're recognized.

btf_check_and_fixup_fields now groups list_head and rb_root together as
"graph root" fields and {list,rb}_node as "graph node", and does same
ownership cycle checking as before. Note that this function does _not_
prevent ownership type mixups (e.g. rb_root owning list_node) - that's
handled by btf_parse_graph_root.

After this patch, a bpf program can have a struct bpf_rb_root in a
map_value, but not add anything to nor do anything useful with it.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  20 ++-
 include/uapi/linux/bpf.h                           |  11 ++
 kernel/bpf/btf.c                                   | 162 ++++++++++++++-------
 kernel/bpf/helpers.c                               |  40 +++++
 kernel/bpf/syscall.c                               |  28 ++--
 kernel/bpf/verifier.c                              |   5 +-
 tools/include/uapi/linux/bpf.h                     |  11 ++
 .../testing/selftests/bpf/prog_tests/linked_list.c |  12 +-
 8 files changed, 216 insertions(+), 73 deletions(-)

(limited to 'tools/testing')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8b5d0b4c4ada..be34f7deb6c3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -181,7 +181,10 @@ enum btf_field_type {
 	BPF_KPTR       = BPF_KPTR_UNREF | BPF_KPTR_REF,
 	BPF_LIST_HEAD  = (1 << 4),
 	BPF_LIST_NODE  = (1 << 5),
-	BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD,
+	BPF_RB_ROOT    = (1 << 6),
+	BPF_RB_NODE    = (1 << 7),
+	BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
+				 BPF_RB_NODE | BPF_RB_ROOT,
 };
 
 struct btf_field_kptr {
@@ -285,6 +288,10 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 		return "bpf_list_head";
 	case BPF_LIST_NODE:
 		return "bpf_list_node";
+	case BPF_RB_ROOT:
+		return "bpf_rb_root";
+	case BPF_RB_NODE:
+		return "bpf_rb_node";
 	default:
 		WARN_ON_ONCE(1);
 		return "unknown";
@@ -305,6 +312,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 		return sizeof(struct bpf_list_head);
 	case BPF_LIST_NODE:
 		return sizeof(struct bpf_list_node);
+	case BPF_RB_ROOT:
+		return sizeof(struct bpf_rb_root);
+	case BPF_RB_NODE:
+		return sizeof(struct bpf_rb_node);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -325,6 +336,10 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 		return __alignof__(struct bpf_list_head);
 	case BPF_LIST_NODE:
 		return __alignof__(struct bpf_list_node);
+	case BPF_RB_ROOT:
+		return __alignof__(struct bpf_rb_root);
+	case BPF_RB_NODE:
+		return __alignof__(struct bpf_rb_node);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -435,6 +450,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 void bpf_timer_cancel_and_free(void *timer);
 void bpf_list_head_free(const struct btf_field *field, void *list_head,
 			struct bpf_spin_lock *spin_lock);
+void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
+		      struct bpf_spin_lock *spin_lock);
+
 
 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 17afd2b35ee5..1503f61336b6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6917,6 +6917,17 @@ struct bpf_list_node {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_rb_root {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_rb_node {
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 740bdb045b14..b9d1f5c4e316 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3324,12 +3324,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf,
 	return NULL;
 }
 
-static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
-			      const struct btf_type *t, int comp_idx,
-			      u32 off, int sz, struct btf_field_info *info)
+static int
+btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
+		    const struct btf_type *t, int comp_idx, u32 off,
+		    int sz, struct btf_field_info *info,
+		    enum btf_field_type head_type)
 {
+	const char *node_field_name;
 	const char *value_type;
-	const char *list_node;
 	s32 id;
 
 	if (!__btf_type_is_struct(t))
@@ -3339,26 +3341,32 @@ static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
 	value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
 	if (!value_type)
 		return -EINVAL;
-	list_node = strstr(value_type, ":");
-	if (!list_node)
+	node_field_name = strstr(value_type, ":");
+	if (!node_field_name)
 		return -EINVAL;
-	value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN);
+	value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
 	if (!value_type)
 		return -ENOMEM;
 	id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
 	kfree(value_type);
 	if (id < 0)
 		return id;
-	list_node++;
-	if (str_is_empty(list_node))
+	node_field_name++;
+	if (str_is_empty(node_field_name))
 		return -EINVAL;
-	info->type = BPF_LIST_HEAD;
+	info->type = head_type;
 	info->off = off;
 	info->graph_root.value_btf_id = id;
-	info->graph_root.node_name = list_node;
+	info->graph_root.node_name = node_field_name;
 	return BTF_FIELD_FOUND;
 }
 
+#define field_mask_test_name(field_type, field_type_str) \
+	if (field_mask & field_type && !strcmp(name, field_type_str)) { \
+		type = field_type;					\
+		goto end;						\
+	}
+
 static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
 			      int *align, int *sz)
 {
@@ -3382,18 +3390,11 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
 			goto end;
 		}
 	}
-	if (field_mask & BPF_LIST_HEAD) {
-		if (!strcmp(name, "bpf_list_head")) {
-			type = BPF_LIST_HEAD;
-			goto end;
-		}
-	}
-	if (field_mask & BPF_LIST_NODE) {
-		if (!strcmp(name, "bpf_list_node")) {
-			type = BPF_LIST_NODE;
-			goto end;
-		}
-	}
+	field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
+	field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
+	field_mask_test_name(BPF_RB_ROOT,   "bpf_rb_root");
+	field_mask_test_name(BPF_RB_NODE,   "bpf_rb_node");
+
 	/* Only return BPF_KPTR when all other types with matchable names fail */
 	if (field_mask & BPF_KPTR) {
 		type = BPF_KPTR_REF;
@@ -3406,6 +3407,8 @@ end:
 	return type;
 }
 
+#undef field_mask_test_name
+
 static int btf_find_struct_field(const struct btf *btf,
 				 const struct btf_type *t, u32 field_mask,
 				 struct btf_field_info *info, int info_cnt)
@@ -3438,6 +3441,7 @@ static int btf_find_struct_field(const struct btf *btf,
 		case BPF_SPIN_LOCK:
 		case BPF_TIMER:
 		case BPF_LIST_NODE:
+		case BPF_RB_NODE:
 			ret = btf_find_struct(btf, member_type, off, sz, field_type,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3451,8 +3455,11 @@ static int btf_find_struct_field(const struct btf *btf,
 				return ret;
 			break;
 		case BPF_LIST_HEAD:
-			ret = btf_find_list_head(btf, t, member_type, i, off, sz,
-						 idx < info_cnt ? &info[idx] : &tmp);
+		case BPF_RB_ROOT:
+			ret = btf_find_graph_root(btf, t, member_type,
+						  i, off, sz,
+						  idx < info_cnt ? &info[idx] : &tmp,
+						  field_type);
 			if (ret < 0)
 				return ret;
 			break;
@@ -3499,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 		case BPF_SPIN_LOCK:
 		case BPF_TIMER:
 		case BPF_LIST_NODE:
+		case BPF_RB_NODE:
 			ret = btf_find_struct(btf, var_type, off, sz, field_type,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3512,8 +3520,11 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 				return ret;
 			break;
 		case BPF_LIST_HEAD:
-			ret = btf_find_list_head(btf, var, var_type, -1, off, sz,
-						 idx < info_cnt ? &info[idx] : &tmp);
+		case BPF_RB_ROOT:
+			ret = btf_find_graph_root(btf, var, var_type,
+						  -1, off, sz,
+						  idx < info_cnt ? &info[idx] : &tmp,
+						  field_type);
 			if (ret < 0)
 				return ret;
 			break;
@@ -3615,8 +3626,11 @@ end_btf:
 	return ret;
 }
 
-static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
-			       struct btf_field_info *info)
+static int btf_parse_graph_root(const struct btf *btf,
+				struct btf_field *field,
+				struct btf_field_info *info,
+				const char *node_type_name,
+				size_t node_type_align)
 {
 	const struct btf_type *t, *n = NULL;
 	const struct btf_member *member;
@@ -3638,13 +3652,13 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
 		n = btf_type_by_id(btf, member->type);
 		if (!__btf_type_is_struct(n))
 			return -EINVAL;
-		if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
+		if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off)))
 			return -EINVAL;
 		offset = __btf_member_bit_offset(n, member);
 		if (offset % 8)
 			return -EINVAL;
 		offset /= 8;
-		if (offset % __alignof__(struct bpf_list_node))
+		if (offset % node_type_align)
 			return -EINVAL;
 
 		field->graph_root.btf = (struct btf *)btf;
@@ -3656,6 +3670,20 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
 	return 0;
 }
 
+static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
+			       struct btf_field_info *info)
+{
+	return btf_parse_graph_root(btf, field, info, "bpf_list_node",
+					    __alignof__(struct bpf_list_node));
+}
+
+static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
+			     struct btf_field_info *info)
+{
+	return btf_parse_graph_root(btf, field, info, "bpf_rb_node",
+					    __alignof__(struct bpf_rb_node));
+}
+
 struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
 				    u32 field_mask, u32 value_size)
 {
@@ -3718,7 +3746,13 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 			if (ret < 0)
 				goto end;
 			break;
+		case BPF_RB_ROOT:
+			ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]);
+			if (ret < 0)
+				goto end;
+			break;
 		case BPF_LIST_NODE:
+		case BPF_RB_NODE:
 			break;
 		default:
 			ret = -EFAULT;
@@ -3727,8 +3761,9 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 		rec->cnt++;
 	}
 
-	/* bpf_list_head requires bpf_spin_lock */
-	if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) {
+	/* bpf_{list_head, rb_node} require bpf_spin_lock */
+	if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
+	     btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
 		ret = -EINVAL;
 		goto end;
 	}
@@ -3739,22 +3774,28 @@ end:
 	return ERR_PTR(ret);
 }
 
+#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
+#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
+
 int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
 {
 	int i;
 
-	/* There are two owning types, kptr_ref and bpf_list_head. The former
-	 * only supports storing kernel types, which can never store references
-	 * to program allocated local types, atleast not yet. Hence we only need
-	 * to ensure that bpf_list_head ownership does not form cycles.
+	/* There are three types that signify ownership of some other type:
+	 *  kptr_ref, bpf_list_head, bpf_rb_root.
+	 * kptr_ref only supports storing kernel types, which can't store
+	 * references to program allocated local types.
+	 *
+	 * Hence we only need to ensure that bpf_{list_head,rb_root} ownership
+	 * does not form cycles.
 	 */
-	if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD))
+	if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
 		return 0;
 	for (i = 0; i < rec->cnt; i++) {
 		struct btf_struct_meta *meta;
 		u32 btf_id;
 
-		if (!(rec->fields[i].type & BPF_LIST_HEAD))
+		if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
 			continue;
 		btf_id = rec->fields[i].graph_root.value_btf_id;
 		meta = btf_find_struct_meta(btf, btf_id);
@@ -3762,39 +3803,47 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
 			return -EFAULT;
 		rec->fields[i].graph_root.value_rec = meta->record;
 
-		if (!(rec->field_mask & BPF_LIST_NODE))
+		/* We need to set value_rec for all root types, but no need
+		 * to check ownership cycle for a type unless it's also a
+		 * node type.
+		 */
+		if (!(rec->field_mask & GRAPH_NODE_MASK))
 			continue;
 
 		/* We need to ensure ownership acyclicity among all types. The
 		 * proper way to do it would be to topologically sort all BTF
 		 * IDs based on the ownership edges, since there can be multiple
-		 * bpf_list_head in a type. Instead, we use the following
-		 * reasoning:
+		 * bpf_{list_head,rb_node} in a type. Instead, we use the
+		 * following resaoning:
 		 *
 		 * - A type can only be owned by another type in user BTF if it
-		 *   has a bpf_list_node.
+		 *   has a bpf_{list,rb}_node. Let's call these node types.
 		 * - A type can only _own_ another type in user BTF if it has a
-		 *   bpf_list_head.
+		 *   bpf_{list_head,rb_root}. Let's call these root types.
 		 *
-		 * We ensure that if a type has both bpf_list_head and
-		 * bpf_list_node, its element types cannot be owning types.
+		 * We ensure that if a type is both a root and node, its
+		 * element types cannot be root types.
 		 *
 		 * To ensure acyclicity:
 		 *
-		 * When A only has bpf_list_head, ownership chain can be:
+		 * When A is an root type but not a node, its ownership
+		 * chain can be:
 		 *	A -> B -> C
 		 * Where:
-		 * - B has both bpf_list_head and bpf_list_node.
-		 * - C only has bpf_list_node.
+		 * - A is an root, e.g. has bpf_rb_root.
+		 * - B is both a root and node, e.g. has bpf_rb_node and
+		 *   bpf_list_head.
+		 * - C is only an root, e.g. has bpf_list_node
 		 *
-		 * When A has both bpf_list_head and bpf_list_node, some other
-		 * type already owns it in the BTF domain, hence it can not own
-		 * another owning type through any of the bpf_list_head edges.
+		 * When A is both a root and node, some other type already
+		 * owns it in the BTF domain, hence it can not own
+		 * another root type through any of the ownership edges.
 		 *	A -> B
 		 * Where:
-		 * - B only has bpf_list_node.
+		 * - A is both an root and node.
+		 * - B is only an node.
 		 */
-		if (meta->record->field_mask & BPF_LIST_HEAD)
+		if (meta->record->field_mask & GRAPH_ROOT_MASK)
 			return -ELOOP;
 	}
 	return 0;
@@ -5256,6 +5305,8 @@ static const char *alloc_obj_fields[] = {
 	"bpf_spin_lock",
 	"bpf_list_head",
 	"bpf_list_node",
+	"bpf_rb_root",
+	"bpf_rb_node",
 };
 
 static struct btf_struct_metas *
@@ -5329,7 +5380,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
 
 		type = &tab->types[tab->cnt];
 		type->btf_id = i;
-		record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size);
+		record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
+						  BPF_RB_ROOT | BPF_RB_NODE, t->size);
 		/* The record cannot be unset, treat it as an error if so */
 		if (IS_ERR_OR_NULL(record)) {
 			ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2dae44581922..192184b5156e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1772,6 +1772,46 @@ unlock:
 	}
 }
 
+/* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
+ * 'rb_node *', so field name of rb_node within containing struct is not
+ * needed.
+ *
+ * Since bpf_rb_tree's node type has a corresponding struct btf_field with
+ * graph_root.node_offset, it's not necessary to know field name
+ * or type of node struct
+ */
+#define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
+	for (pos = rb_first_postorder(root); \
+	    pos && ({ n = rb_next_postorder(pos); 1; }); \
+	    pos = n)
+
+void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
+		      struct bpf_spin_lock *spin_lock)
+{
+	struct rb_root_cached orig_root, *root = rb_root;
+	struct rb_node *pos, *n;
+	void *obj;
+
+	BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
+	BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
+
+	__bpf_spin_lock_irqsave(spin_lock);
+	orig_root = *root;
+	*root = RB_ROOT_CACHED;
+	__bpf_spin_unlock_irqrestore(spin_lock);
+
+	bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
+		obj = pos;
+		obj -= field->graph_root.node_offset;
+
+		bpf_obj_free_fields(field->graph_root.value_rec, obj);
+
+		migrate_disable();
+		bpf_mem_free(&bpf_global_ma, obj);
+		migrate_enable();
+	}
+}
+
 __diag_push();
 __diag_ignore_all("-Wmissing-prototypes",
 		  "Global functions as their definitions will be in vmlinux BTF");
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cda8d00f3762..e3fcdc9836a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -537,9 +537,6 @@ void btf_record_free(struct btf_record *rec)
 		return;
 	for (i = 0; i < rec->cnt; i++) {
 		switch (rec->fields[i].type) {
-		case BPF_SPIN_LOCK:
-		case BPF_TIMER:
-			break;
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
 			if (rec->fields[i].kptr.module)
@@ -548,7 +545,11 @@ void btf_record_free(struct btf_record *rec)
 			break;
 		case BPF_LIST_HEAD:
 		case BPF_LIST_NODE:
-			/* Nothing to release for bpf_list_head */
+		case BPF_RB_ROOT:
+		case BPF_RB_NODE:
+		case BPF_SPIN_LOCK:
+		case BPF_TIMER:
+			/* Nothing to release */
 			break;
 		default:
 			WARN_ON_ONCE(1);
@@ -581,9 +582,6 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
 	new_rec->cnt = 0;
 	for (i = 0; i < rec->cnt; i++) {
 		switch (fields[i].type) {
-		case BPF_SPIN_LOCK:
-		case BPF_TIMER:
-			break;
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
 			btf_get(fields[i].kptr.btf);
@@ -594,7 +592,11 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
 			break;
 		case BPF_LIST_HEAD:
 		case BPF_LIST_NODE:
-			/* Nothing to acquire for bpf_list_head */
+		case BPF_RB_ROOT:
+		case BPF_RB_NODE:
+		case BPF_SPIN_LOCK:
+		case BPF_TIMER:
+			/* Nothing to acquire */
 			break;
 		default:
 			ret = -EFAULT;
@@ -674,7 +676,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 				continue;
 			bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
 			break;
+		case BPF_RB_ROOT:
+			if (WARN_ON_ONCE(rec->spin_lock_off < 0))
+				continue;
+			bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
+			break;
 		case BPF_LIST_NODE:
+		case BPF_RB_NODE:
 			break;
 		default:
 			WARN_ON_ONCE(1);
@@ -1010,7 +1018,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		return -EINVAL;
 
 	map->record = btf_parse_fields(btf, value_type,
-				       BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
+				       BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
+				       BPF_RB_ROOT,
 				       map->value_size);
 	if (!IS_ERR_OR_NULL(map->record)) {
 		int i;
@@ -1058,6 +1067,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 				}
 				break;
 			case BPF_LIST_HEAD:
+			case BPF_RB_ROOT:
 				if (map->map_type != BPF_MAP_TYPE_HASH &&
 				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
 				    map->map_type != BPF_MAP_TYPE_ARRAY) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f176bc15c879..4fd098851f43 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14703,9 +14703,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
 
-	if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
+	if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
+	    btf_record_has_field(map->record, BPF_RB_ROOT)) {
 		if (is_tracing_prog_type(prog_type)) {
-			verbose(env, "tracing progs cannot use bpf_list_head yet\n");
+			verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
 			return -EINVAL;
 		}
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 17afd2b35ee5..1503f61336b6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6917,6 +6917,17 @@ struct bpf_list_node {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_rb_root {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_rb_node {
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 2592b8aa5e41..c456b34a823a 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -58,12 +58,12 @@ static struct {
 	TEST(inner_map, pop_front)
 	TEST(inner_map, pop_back)
 #undef TEST
-	{ "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" },
-	{ "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" },
-	{ "map_compat_tp", "tracing progs cannot use bpf_list_head yet" },
-	{ "map_compat_perf", "tracing progs cannot use bpf_list_head yet" },
-	{ "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" },
-	{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" },
+	{ "map_compat_kprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+	{ "map_compat_kretprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+	{ "map_compat_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+	{ "map_compat_perf", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+	{ "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+	{ "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
 	{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
 	{ "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
 	{ "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
-- 
cgit v1.2.3


From a40d3632436b1677a94c16e77be8da798ee9e12b Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Mon, 13 Feb 2023 16:40:14 -0800
Subject: bpf: Special verifier handling for bpf_rbtree_{remove, first}

Newly-added bpf_rbtree_{remove,first} kfuncs have some special properties
that require handling in the verifier:

  * both bpf_rbtree_remove and bpf_rbtree_first return the type containing
    the bpf_rb_node field, with the offset set to that field's offset,
    instead of a struct bpf_rb_node *
    * mark_reg_graph_node helper added in previous patch generalizes
      this logic, use it

  * bpf_rbtree_remove's node input is a node that's been inserted
    in the tree - a non-owning reference.

  * bpf_rbtree_remove must invalidate non-owning references in order to
    avoid aliasing issue. Use previously-added
    invalidate_non_owning_refs helper to mark this function as a
    non-owning ref invalidation point.

  * Unlike other functions, which convert one of their input arg regs to
    non-owning reference, bpf_rbtree_first takes no arguments and just
    returns a non-owning reference (possibly null)
    * For now verifier logic for this is special-cased instead of
      adding new kfunc flag.

This patch, along with the previous one, complete special verifier
handling for all rbtree API functions added in this series.

With functional verifier handling of rbtree_remove, under current
non-owning reference scheme, a node type with both bpf_{list,rb}_node
fields could cause the verifier to accept programs which remove such
nodes from collections they haven't been added to.

In order to prevent this, this patch adds a check to btf_parse_fields
which rejects structs with both bpf_{list,rb}_node fields. This is a
temporary measure that can be removed after "collection identity"
followup. See comment added in btf_parse_fields. A linked_list BTF test
exercising the new check is added in this patch as well.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-6-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/btf.c                                   | 24 ++++++++++++
 kernel/bpf/verifier.c                              | 43 ++++++++++++++++------
 .../testing/selftests/bpf/prog_tests/linked_list.c | 37 +++++++++++++++++++
 3 files changed, 92 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index b9d1f5c4e316..6582735ef1fc 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3768,6 +3768,30 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 		goto end;
 	}
 
+	/* need collection identity for non-owning refs before allowing this
+	 *
+	 * Consider a node type w/ both list and rb_node fields:
+	 *   struct node {
+	 *     struct bpf_list_node l;
+	 *     struct bpf_rb_node r;
+	 *   }
+	 *
+	 * Used like so:
+	 *   struct node *n = bpf_obj_new(....);
+	 *   bpf_list_push_front(&list_head, &n->l);
+	 *   bpf_rbtree_remove(&rb_root, &n->r);
+	 *
+	 * It should not be possible to rbtree_remove the node since it hasn't
+	 * been added to a tree. But push_front converts n to a non-owning
+	 * reference, and rbtree_remove accepts the non-owning reference to
+	 * a type w/ bpf_rb_node field.
+	 */
+	if (btf_record_has_field(rec, BPF_LIST_NODE) &&
+	    btf_record_has_field(rec, BPF_RB_NODE)) {
+		ret = -EINVAL;
+		goto end;
+	}
+
 	return rec;
 end:
 	btf_record_free(rec);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 88c8edf67007..21e08c111702 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9682,14 +9682,26 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return ret;
 			break;
 		case KF_ARG_PTR_TO_RB_NODE:
-			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
-				verbose(env, "arg#%d expected pointer to allocated object\n", i);
-				return -EINVAL;
-			}
-			if (!reg->ref_obj_id) {
-				verbose(env, "allocated object must be referenced\n");
-				return -EINVAL;
+			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
+				if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
+					verbose(env, "rbtree_remove node input must be non-owning ref\n");
+					return -EINVAL;
+				}
+				if (in_rbtree_lock_required_cb(env)) {
+					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+					return -EINVAL;
+				}
+			} else {
+				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+					verbose(env, "arg#%d expected pointer to allocated object\n", i);
+					return -EINVAL;
+				}
+				if (!reg->ref_obj_id) {
+					verbose(env, "allocated object must be referenced\n");
+					return -EINVAL;
+				}
 			}
+
 			ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
 			if (ret < 0)
 				return ret;
@@ -9940,11 +9952,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
 				struct btf_field *field = meta.arg_list_head.field;
 
-				mark_reg_known_zero(env, regs, BPF_REG_0);
-				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
-				regs[BPF_REG_0].btf = field->graph_root.btf;
-				regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
-				regs[BPF_REG_0].off = field->graph_root.node_offset;
+				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+				struct btf_field *field = meta.arg_rbtree_root.field;
+
+				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
 			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
 				mark_reg_known_zero(env, regs, BPF_REG_0);
 				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
@@ -10010,7 +10023,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			if (is_kfunc_ret_null(&meta))
 				regs[BPF_REG_0].id = id;
 			regs[BPF_REG_0].ref_obj_id = id;
+		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+			ref_set_non_owning(env, &regs[BPF_REG_0]);
 		}
+
+		if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
+			invalidate_non_owning_refs(env);
+
 		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
 			regs[BPF_REG_0].id = ++env->id_gen;
 	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index c456b34a823a..0ed8132ce1c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -715,6 +715,43 @@ static void test_btf(void)
 		btf__free(btf);
 		break;
 	}
+
+	while (test__start_subtest("btf: list_node and rb_node in same struct")) {
+		btf = init_btf();
+		if (!ASSERT_OK_PTR(btf, "init_btf"))
+			break;
+
+		id = btf__add_struct(btf, "bpf_rb_node", 24);
+		if (!ASSERT_EQ(id, 5, "btf__add_struct bpf_rb_node"))
+			break;
+		id = btf__add_struct(btf, "bar", 40);
+		if (!ASSERT_EQ(id, 6, "btf__add_struct bar"))
+			break;
+		err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::a"))
+			break;
+		err = btf__add_field(btf, "c", 5, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field bar::c"))
+			break;
+
+		id = btf__add_struct(btf, "foo", 20);
+		if (!ASSERT_EQ(id, 7, "btf__add_struct foo"))
+			break;
+		err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::a"))
+			break;
+		err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+		if (!ASSERT_OK(err, "btf__add_field foo::b"))
+			break;
+		id = btf__add_decl_tag(btf, "contains:bar:a", 7, 0);
+		if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:bar:a"))
+			break;
+
+		err = btf__load_into_kernel(btf);
+		ASSERT_EQ(err, -EINVAL, "check btf");
+		btf__free(btf);
+		break;
+	}
 }
 
 void test_linked_list(void)
-- 
cgit v1.2.3


From c834df847ee60eeb678171eb0f1e59f611c62a99 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Mon, 13 Feb 2023 16:40:15 -0800
Subject: bpf: Add bpf_rbtree_{add,remove,first} decls to bpf_experimental.h

These kfuncs will be used by selftests in following patches

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-7-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_experimental.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 424f7bbbfe9b..dbd2c729781a 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -65,4 +65,28 @@ extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ks
  */
 extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
 
+/* Description
+ *	Remove 'node' from rbtree with root 'root'
+ * Returns
+ * 	Pointer to the removed node, or NULL if 'root' didn't contain 'node'
+ */
+extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+					     struct bpf_rb_node *node) __ksym;
+
+/* Description
+ *	Add 'node' to rbtree with root 'root' using comparator 'less'
+ * Returns
+ *	Nothing
+ */
+extern void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+			   bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) __ksym;
+
+/* Description
+ *	Return the first (leftmost) node in input tree
+ * Returns
+ *	Pointer to the node, which is _not_ removed from the tree. If the tree
+ *	contains no nodes, returns NULL.
+ */
+extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
+
 #endif
-- 
cgit v1.2.3


From 215249f6adc0359e3546829e7ee622b5e309b0ad Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Mon, 13 Feb 2023 16:40:16 -0800
Subject: selftests/bpf: Add rbtree selftests

This patch adds selftests exercising the logic changed/added in the
previous patches in the series. A variety of successful and unsuccessful
rbtree usages are validated:

Success:
  * Add some nodes, let map_value bpf_rbtree_root destructor clean them
    up
  * Add some nodes, remove one using the non-owning ref leftover by
    successful rbtree_add() call
  * Add some nodes, remove one using the non-owning ref returned by
    rbtree_first() call

Failure:
  * BTF where bpf_rb_root owns bpf_list_node should fail to load
  * BTF where node of type X is added to tree containing nodes of type Y
    should fail to load
  * No calling rbtree api functions in 'less' callback for rbtree_add
  * No releasing lock in 'less' callback for rbtree_add
  * No removing a node which hasn't been added to any tree
  * No adding a node which has already been added to a tree
  * No escaping of non-owning references past their lock's
    critical section
  * No escaping of non-owning references past other invalidation points
    (rbtree_remove)

These tests mostly focus on rbtree-specific additions, but some of the
failure cases revalidate scenarios common to both linked_list and rbtree
which are covered in the former's tests. Better to be a bit redundant in
case linked_list and rbtree semantics deviate over time.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230214004017.2534011-8-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/rbtree.c    | 117 ++++++++
 tools/testing/selftests/bpf/progs/rbtree.c         | 176 +++++++++++
 .../bpf/progs/rbtree_btf_fail__add_wrong_type.c    |  52 ++++
 .../bpf/progs/rbtree_btf_fail__wrong_node_type.c   |  49 ++++
 tools/testing/selftests/bpf/progs/rbtree_fail.c    | 322 +++++++++++++++++++++
 5 files changed, 716 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/rbtree.c
 create mode 100644 tools/testing/selftests/bpf/progs/rbtree.c
 create mode 100644 tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
 create mode 100644 tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
 create mode 100644 tools/testing/selftests/bpf/progs/rbtree_fail.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
new file mode 100644
index 000000000000..156fa95c42f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "rbtree.skel.h"
+#include "rbtree_fail.skel.h"
+#include "rbtree_btf_fail__wrong_node_type.skel.h"
+#include "rbtree_btf_fail__add_wrong_type.skel.h"
+
+static void test_rbtree_add_nodes(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+	struct rbtree *skel;
+	int ret;
+
+	skel = rbtree__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes), &opts);
+	ASSERT_OK(ret, "rbtree_add_nodes run");
+	ASSERT_OK(opts.retval, "rbtree_add_nodes retval");
+	ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes less_callback_ran");
+
+	rbtree__destroy(skel);
+}
+
+static void test_rbtree_add_and_remove(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+	struct rbtree *skel;
+	int ret;
+
+	skel = rbtree__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove), &opts);
+	ASSERT_OK(ret, "rbtree_add_and_remove");
+	ASSERT_OK(opts.retval, "rbtree_add_and_remove retval");
+	ASSERT_EQ(skel->data->removed_key, 5, "rbtree_add_and_remove first removed key");
+
+	rbtree__destroy(skel);
+}
+
+static void test_rbtree_first_and_remove(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+	struct rbtree *skel;
+	int ret;
+
+	skel = rbtree__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_first_and_remove), &opts);
+	ASSERT_OK(ret, "rbtree_first_and_remove");
+	ASSERT_OK(opts.retval, "rbtree_first_and_remove retval");
+	ASSERT_EQ(skel->data->first_data[0], 2, "rbtree_first_and_remove first rbtree_first()");
+	ASSERT_EQ(skel->data->removed_key, 1, "rbtree_first_and_remove first removed key");
+	ASSERT_EQ(skel->data->first_data[1], 4, "rbtree_first_and_remove second rbtree_first()");
+
+	rbtree__destroy(skel);
+}
+
+void test_rbtree_success(void)
+{
+	if (test__start_subtest("rbtree_add_nodes"))
+		test_rbtree_add_nodes();
+	if (test__start_subtest("rbtree_add_and_remove"))
+		test_rbtree_add_and_remove();
+	if (test__start_subtest("rbtree_first_and_remove"))
+		test_rbtree_first_and_remove();
+}
+
+#define BTF_FAIL_TEST(suffix)									\
+void test_rbtree_btf_fail__##suffix(void)							\
+{												\
+	struct rbtree_btf_fail__##suffix *skel;							\
+												\
+	skel = rbtree_btf_fail__##suffix##__open_and_load();					\
+	if (!ASSERT_ERR_PTR(skel,								\
+			    "rbtree_btf_fail__" #suffix "__open_and_load unexpected success"))	\
+		rbtree_btf_fail__##suffix##__destroy(skel);					\
+}
+
+#define RUN_BTF_FAIL_TEST(suffix)				\
+	if (test__start_subtest("rbtree_btf_fail__" #suffix))	\
+		test_rbtree_btf_fail__##suffix();
+
+BTF_FAIL_TEST(wrong_node_type);
+BTF_FAIL_TEST(add_wrong_type);
+
+void test_rbtree_btf_fail(void)
+{
+	RUN_BTF_FAIL_TEST(wrong_node_type);
+	RUN_BTF_FAIL_TEST(add_wrong_type);
+}
+
+void test_rbtree_fail(void)
+{
+	RUN_TESTS(rbtree_fail);
+}
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
new file mode 100644
index 000000000000..e5db1a4287e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+	long key;
+	long data;
+	struct bpf_rb_node node;
+};
+
+long less_callback_ran = -1;
+long removed_key = -1;
+long first_data[2] = {-1, -1};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = container_of(a, struct node_data, node);
+	node_b = container_of(b, struct node_data, node);
+	less_callback_ran = 1;
+
+	return node_a->key < node_b->key;
+}
+
+static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
+{
+	struct node_data *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+	n->key = 5;
+
+	m = bpf_obj_new(typeof(*m));
+	if (!m) {
+		bpf_obj_drop(n);
+		return 2;
+	}
+	m->key = 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_rbtree_add(&groot, &m->node, less);
+	bpf_spin_unlock(&glock);
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 3;
+	n->key = 3;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+SEC("tc")
+long rbtree_add_nodes(void *ctx)
+{
+	return __add_three(&groot, &glock);
+}
+
+SEC("tc")
+long rbtree_add_and_remove(void *ctx)
+{
+	struct bpf_rb_node *res = NULL;
+	struct node_data *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		goto err_out;
+	n->key = 5;
+
+	m = bpf_obj_new(typeof(*m));
+	if (!m)
+		goto err_out;
+	m->key = 3;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_rbtree_add(&groot, &m->node, less);
+	res = bpf_rbtree_remove(&groot, &n->node);
+	bpf_spin_unlock(&glock);
+
+	n = container_of(res, struct node_data, node);
+	removed_key = n->key;
+
+	bpf_obj_drop(n);
+
+	return 0;
+err_out:
+	if (n)
+		bpf_obj_drop(n);
+	if (m)
+		bpf_obj_drop(m);
+	return 1;
+}
+
+SEC("tc")
+long rbtree_first_and_remove(void *ctx)
+{
+	struct bpf_rb_node *res = NULL;
+	struct node_data *n, *m, *o;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+	n->key = 3;
+	n->data = 4;
+
+	m = bpf_obj_new(typeof(*m));
+	if (!m)
+		goto err_out;
+	m->key = 5;
+	m->data = 6;
+
+	o = bpf_obj_new(typeof(*o));
+	if (!o)
+		goto err_out;
+	o->key = 1;
+	o->data = 2;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_rbtree_add(&groot, &m->node, less);
+	bpf_rbtree_add(&groot, &o->node, less);
+
+	res = bpf_rbtree_first(&groot);
+	if (!res) {
+		bpf_spin_unlock(&glock);
+		return 2;
+	}
+
+	o = container_of(res, struct node_data, node);
+	first_data[0] = o->data;
+
+	res = bpf_rbtree_remove(&groot, &o->node);
+	bpf_spin_unlock(&glock);
+
+	o = container_of(res, struct node_data, node);
+	removed_key = o->key;
+
+	bpf_obj_drop(o);
+
+	bpf_spin_lock(&glock);
+	res = bpf_rbtree_first(&groot);
+	if (!res) {
+		bpf_spin_unlock(&glock);
+		return 3;
+	}
+
+	o = container_of(res, struct node_data, node);
+	first_data[1] = o->data;
+	bpf_spin_unlock(&glock);
+
+	return 0;
+err_out:
+	if (n)
+		bpf_obj_drop(n);
+	if (m)
+		bpf_obj_drop(m);
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
new file mode 100644
index 000000000000..60079b202c07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+	int key;
+	int data;
+	struct bpf_rb_node node;
+};
+
+struct node_data2 {
+	int key;
+	struct bpf_rb_node node;
+	int data;
+};
+
+static bool less2(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data2 *node_a;
+	struct node_data2 *node_b;
+
+	node_a = container_of(a, struct node_data2, node);
+	node_b = container_of(b, struct node_data2, node);
+
+	return node_a->key < node_b->key;
+}
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__add_wrong_type(void *ctx)
+{
+	struct node_data2 *n;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less2);
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
new file mode 100644
index 000000000000..340f97da1084
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+/* BTF load should fail as bpf_rb_root __contains this type and points to
+ * 'node', but 'node' is not a bpf_rb_node
+ */
+struct node_data {
+	int key;
+	int data;
+	struct bpf_list_node node;
+};
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = container_of(a, struct node_data, node);
+	node_b = container_of(b, struct node_data, node);
+
+	return node_a->key < node_b->key;
+}
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__wrong_node_type(void *ctx)
+{
+	struct node_data *n;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_first(&groot);
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
new file mode 100644
index 000000000000..bf3cba115897
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_data {
+	long key;
+	long data;
+	struct bpf_rb_node node;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+private(A) struct bpf_rb_root groot2 __contains(node_data, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = container_of(a, struct node_data, node);
+	node_b = container_of(b, struct node_data, node);
+
+	return node_a->key < node_b->key;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_add(void *ctx)
+{
+	struct node_data *n;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_rbtree_add(&groot, &n->node, less);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_remove(void *ctx)
+{
+	struct node_data *n;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_spin_unlock(&glock);
+
+	bpf_rbtree_remove(&groot, &n->node);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_first(void *ctx)
+{
+	bpf_rbtree_first(&groot);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_remove_unadded_node(void *ctx)
+{
+	struct node_data *n, *m;
+	struct bpf_rb_node *res;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	m = bpf_obj_new(typeof(*m));
+	if (!m) {
+		bpf_obj_drop(n);
+		return 1;
+	}
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+
+	/* This remove should pass verifier */
+	res = bpf_rbtree_remove(&groot, &n->node);
+	n = container_of(res, struct node_data, node);
+
+	/* This remove shouldn't, m isn't in an rbtree */
+	res = bpf_rbtree_remove(&groot, &m->node);
+	m = container_of(res, struct node_data, node);
+	bpf_spin_unlock(&glock);
+
+	if (n)
+		bpf_obj_drop(n);
+	if (m)
+		bpf_obj_drop(m);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=2 alloc_insn=11")
+long rbtree_api_remove_no_drop(void *ctx)
+{
+	struct bpf_rb_node *res;
+	struct node_data *n;
+
+	bpf_spin_lock(&glock);
+	res = bpf_rbtree_first(&groot);
+	if (!res)
+		goto unlock_err;
+
+	res = bpf_rbtree_remove(&groot, res);
+
+	n = container_of(res, struct node_data, node);
+	bpf_spin_unlock(&glock);
+
+	/* bpf_obj_drop(n) is missing here */
+	return 0;
+
+unlock_err:
+	bpf_spin_unlock(&glock);
+	return 1;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_to_multiple_trees(void *ctx)
+{
+	struct node_data *n;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+
+	/* This add should fail since n already in groot's tree */
+	bpf_rbtree_add(&groot2, &n->node, less);
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_add_release_unlock_escape(void *ctx)
+{
+	struct node_data *n;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_spin_unlock(&glock);
+
+	bpf_spin_lock(&glock);
+	/* After add() in previous critical section, n should be
+	 * release_on_unlock and released after previous spin_unlock,
+	 * so should not be possible to use it here
+	 */
+	bpf_rbtree_remove(&groot, &n->node);
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_release_aliasing(void *ctx)
+{
+	struct node_data *n, *m, *o;
+	struct bpf_rb_node *res;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, less);
+	bpf_spin_unlock(&glock);
+
+	bpf_spin_lock(&glock);
+
+	/* m and o point to the same node,
+	 * but verifier doesn't know this
+	 */
+	res = bpf_rbtree_first(&groot);
+	if (!res)
+		return 1;
+	o = container_of(res, struct node_data, node);
+
+	res = bpf_rbtree_first(&groot);
+	if (!res)
+		return 1;
+	m = container_of(res, struct node_data, node);
+
+	bpf_rbtree_remove(&groot, &m->node);
+	/* This second remove shouldn't be possible. Retval of previous
+	 * remove returns owning reference to m, which is the same
+	 * node o's non-owning ref is pointing at
+	 *
+	 * In order to preserve property
+	 *   * owning ref must not be in rbtree
+	 *   * non-owning ref must be in rbtree
+	 *
+	 * o's ref must be invalidated after previous remove. Otherwise
+	 * we'd have non-owning ref to node that isn't in rbtree, and
+	 * verifier wouldn't be able to use type system to prevent remove
+	 * of ref that already isn't in any tree. Would have to do runtime
+	 * checks in that case.
+	 */
+	bpf_rbtree_remove(&groot, &o->node);
+
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_first_release_unlock_escape(void *ctx)
+{
+	struct bpf_rb_node *res;
+	struct node_data *n;
+
+	bpf_spin_lock(&glock);
+	res = bpf_rbtree_first(&groot);
+	if (res)
+		n = container_of(res, struct node_data, node);
+	bpf_spin_unlock(&glock);
+
+	bpf_spin_lock(&glock);
+	/* After first() in previous critical section, n should be
+	 * release_on_unlock and released after previous spin_unlock,
+	 * so should not be possible to use it here
+	 */
+	bpf_rbtree_remove(&groot, &n->node);
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+static bool less__bad_fn_call_add(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = container_of(a, struct node_data, node);
+	node_b = container_of(b, struct node_data, node);
+	bpf_rbtree_add(&groot, &node_a->node, less);
+
+	return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_remove(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = container_of(a, struct node_data, node);
+	node_b = container_of(b, struct node_data, node);
+	bpf_rbtree_remove(&groot, &node_a->node);
+
+	return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_first_unlock_after(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = container_of(a, struct node_data, node);
+	node_b = container_of(b, struct node_data, node);
+	bpf_rbtree_first(&groot);
+	bpf_spin_unlock(&glock);
+
+	return node_a->key < node_b->key;
+}
+
+static __always_inline
+long add_with_cb(bool (cb)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+{
+	struct node_data *n;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n)
+		return 1;
+
+	bpf_spin_lock(&glock);
+	bpf_rbtree_add(&groot, &n->node, cb);
+	bpf_spin_unlock(&glock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_bad_cb_bad_fn_call_add(void *ctx)
+{
+	return add_with_cb(less__bad_fn_call_add);
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove not allowed in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_remove(void *ctx)
+{
+	return add_with_cb(less__bad_fn_call_remove);
+}
+
+SEC("?tc")
+__failure __msg("can't spin_{lock,unlock} in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_first_unlock_after(void *ctx)
+{
+	return add_with_cb(less__bad_fn_call_first_unlock_after);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 8032cad1030279066ce4a1f82b76d0fe7eb578e2 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 13 Feb 2023 21:13:31 -0800
Subject: selftests/bpf: Clean up user_ringbuf, cgrp_kfunc, kfunc_dynptr_param
 tests

Clean up user_ringbuf, cgrp_kfunc, and kfunc_dynptr_param tests to use
the generic verification tester for checking verifier rejections.
The generic verification tester uses btf_decl_tag-based annotations
for verifying that the tests fail with the expected log messages.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Acked-by: David Vernet <void@manifault.com>
Reviewed-by: Roberto Sassu <roberto.sassu@huawei.com>
Link: https://lore.kernel.org/r/20230214051332.4007131-1-joannelkoong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/cgrp_kfunc.c  | 69 +--------------------
 .../selftests/bpf/prog_tests/kfunc_dynptr_param.c  | 72 +++++-----------------
 .../selftests/bpf/prog_tests/user_ringbuf.c        | 62 +------------------
 .../selftests/bpf/progs/cgrp_kfunc_failure.c       | 17 ++++-
 .../selftests/bpf/progs/test_kfunc_dynptr_param.c  |  4 ++
 .../selftests/bpf/progs/user_ringbuf_fail.c        | 31 +++++++---
 6 files changed, 58 insertions(+), 197 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index f3bb0e16e088..b3f7985c8504 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -8,9 +8,6 @@
 #include "cgrp_kfunc_failure.skel.h"
 #include "cgrp_kfunc_success.skel.h"
 
-static size_t log_buf_sz = 1 << 20; /* 1 MB */
-static char obj_log_buf[1048576];
-
 static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void)
 {
 	struct cgrp_kfunc_success *skel;
@@ -89,65 +86,6 @@ static const char * const success_tests[] = {
 	"test_cgrp_get_ancestors",
 };
 
-static struct {
-	const char *prog_name;
-	const char *expected_err_msg;
-} failure_tests[] = {
-	{"cgrp_kfunc_acquire_untrusted", "Possibly NULL pointer passed to trusted arg0"},
-	{"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"},
-	{"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
-	{"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
-	{"cgrp_kfunc_acquire_null", "Possibly NULL pointer passed to trusted arg0"},
-	{"cgrp_kfunc_acquire_unreleased", "Unreleased reference"},
-	{"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
-	{"cgrp_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
-	{"cgrp_kfunc_get_null", "arg#0 expected pointer to map value"},
-	{"cgrp_kfunc_xchg_unreleased", "Unreleased reference"},
-	{"cgrp_kfunc_get_unreleased", "Unreleased reference"},
-	{"cgrp_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"},
-	{"cgrp_kfunc_release_fp", "arg#0 pointer type STRUCT cgroup must point"},
-	{"cgrp_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
-	{"cgrp_kfunc_release_unacquired", "release kernel function bpf_cgroup_release expects"},
-};
-
-static void verify_fail(const char *prog_name, const char *expected_err_msg)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	struct cgrp_kfunc_failure *skel;
-	int err, i;
-
-	opts.kernel_log_buf = obj_log_buf;
-	opts.kernel_log_size = log_buf_sz;
-	opts.kernel_log_level = 1;
-
-	skel = cgrp_kfunc_failure__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "cgrp_kfunc_failure__open_opts"))
-		goto cleanup;
-
-	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
-		struct bpf_program *prog;
-		const char *curr_name = failure_tests[i].prog_name;
-
-		prog = bpf_object__find_program_by_name(skel->obj, curr_name);
-		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
-			goto cleanup;
-
-		bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
-	}
-
-	err = cgrp_kfunc_failure__load(skel);
-	if (!ASSERT_ERR(err, "unexpected load success"))
-		goto cleanup;
-
-	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
-		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
-		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
-	}
-
-cleanup:
-	cgrp_kfunc_failure__destroy(skel);
-}
-
 void test_cgrp_kfunc(void)
 {
 	int i, err;
@@ -163,12 +101,7 @@ void test_cgrp_kfunc(void)
 		run_success_test(success_tests[i]);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
-		if (!test__start_subtest(failure_tests[i].prog_name))
-			continue;
-
-		verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
-	}
+	RUN_TESTS(cgrp_kfunc_failure);
 
 cleanup:
 	cleanup_cgroup_environment();
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
index 72800b1e8395..8cd298b78e44 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -10,17 +10,11 @@
 #include <test_progs.h>
 #include "test_kfunc_dynptr_param.skel.h"
 
-static size_t log_buf_sz = 1048576; /* 1 MB */
-static char obj_log_buf[1048576];
-
 static struct {
 	const char *prog_name;
-	const char *expected_verifier_err_msg;
 	int expected_runtime_err;
 } kfunc_dynptr_tests[] = {
-	{"not_valid_dynptr", "cannot pass in dynptr at an offset=-8", 0},
-	{"not_ptr_to_stack", "arg#0 expected pointer to stack or dynptr_ptr", 0},
-	{"dynptr_data_null", NULL, -EBADMSG},
+	{"dynptr_data_null", -EBADMSG},
 };
 
 static bool kfunc_not_supported;
@@ -38,29 +32,15 @@ static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
 	return 0;
 }
 
-static void verify_fail(const char *prog_name, const char *expected_err_msg)
+static bool has_pkcs7_kfunc_support(void)
 {
 	struct test_kfunc_dynptr_param *skel;
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
 	libbpf_print_fn_t old_print_cb;
-	struct bpf_program *prog;
 	int err;
 
-	opts.kernel_log_buf = obj_log_buf;
-	opts.kernel_log_size = log_buf_sz;
-	opts.kernel_log_level = 1;
-
-	skel = test_kfunc_dynptr_param__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open_opts"))
-		goto cleanup;
-
-	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
-	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
-		goto cleanup;
-
-	bpf_program__set_autoload(prog, true);
-
-	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+	skel = test_kfunc_dynptr_param__open();
+	if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open"))
+		return false;
 
 	kfunc_not_supported = false;
 
@@ -72,26 +52,18 @@ static void verify_fail(const char *prog_name, const char *expected_err_msg)
 		fprintf(stderr,
 		  "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
 		  __func__);
-		test__skip();
-		goto cleanup;
-	}
-
-	if (!ASSERT_ERR(err, "unexpected load success"))
-		goto cleanup;
-
-	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
-		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
-		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+		test_kfunc_dynptr_param__destroy(skel);
+		return false;
 	}
 
-cleanup:
 	test_kfunc_dynptr_param__destroy(skel);
+
+	return true;
 }
 
 static void verify_success(const char *prog_name, int expected_runtime_err)
 {
 	struct test_kfunc_dynptr_param *skel;
-	libbpf_print_fn_t old_print_cb;
 	struct bpf_program *prog;
 	struct bpf_link *link;
 	__u32 next_id;
@@ -103,21 +75,7 @@ static void verify_success(const char *prog_name, int expected_runtime_err)
 
 	skel->bss->pid = getpid();
 
-	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
-
-	kfunc_not_supported = false;
-
-	old_print_cb = libbpf_set_print(libbpf_print_cb);
 	err = test_kfunc_dynptr_param__load(skel);
-	libbpf_set_print(old_print_cb);
-
-	if (err < 0 && kfunc_not_supported) {
-		fprintf(stderr,
-		  "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
-		  __func__);
-		test__skip();
-		goto cleanup;
-	}
 
 	if (!ASSERT_OK(err, "test_kfunc_dynptr_param__load"))
 		goto cleanup;
@@ -147,15 +105,15 @@ void test_kfunc_dynptr_param(void)
 {
 	int i;
 
+	if (!has_pkcs7_kfunc_support())
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(kfunc_dynptr_tests); i++) {
 		if (!test__start_subtest(kfunc_dynptr_tests[i].prog_name))
 			continue;
 
-		if (kfunc_dynptr_tests[i].expected_verifier_err_msg)
-			verify_fail(kfunc_dynptr_tests[i].prog_name,
-			  kfunc_dynptr_tests[i].expected_verifier_err_msg);
-		else
-			verify_success(kfunc_dynptr_tests[i].prog_name,
-				kfunc_dynptr_tests[i].expected_runtime_err);
+		verify_success(kfunc_dynptr_tests[i].prog_name,
+			kfunc_dynptr_tests[i].expected_runtime_err);
 	}
+	RUN_TESTS(test_kfunc_dynptr_param);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index dae68de285b9..3a13e102c149 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -19,8 +19,6 @@
 
 #include "../progs/test_user_ringbuf.h"
 
-static size_t log_buf_sz = 1 << 20; /* 1 MB */
-static char obj_log_buf[1048576];
 static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
 static const long c_ringbuf_size = 1 << 12; /* 1 small page */
 static const long c_max_entries = c_ringbuf_size / c_sample_size;
@@ -663,23 +661,6 @@ cleanup:
 	user_ringbuf_success__destroy(skel);
 }
 
-static struct {
-	const char *prog_name;
-	const char *expected_err_msg;
-} failure_tests[] = {
-	/* failure cases */
-	{"user_ringbuf_callback_bad_access1", "negative offset dynptr_ptr ptr"},
-	{"user_ringbuf_callback_bad_access2", "dereference of modified dynptr_ptr ptr"},
-	{"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"},
-	{"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"},
-	{"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"},
-	{"user_ringbuf_callback_discard_dynptr", "cannot release unowned const bpf_dynptr"},
-	{"user_ringbuf_callback_submit_dynptr", "cannot release unowned const bpf_dynptr"},
-	{"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"},
-	{"user_ringbuf_callback_reinit_dynptr_mem", "Dynptr has to be an uninitialized dynptr"},
-	{"user_ringbuf_callback_reinit_dynptr_ringbuf", "Dynptr has to be an uninitialized dynptr"},
-};
-
 #define SUCCESS_TEST(_func) { _func, #_func }
 
 static struct {
@@ -700,42 +681,6 @@ static struct {
 	SUCCESS_TEST(test_user_ringbuf_blocking_reserve),
 };
 
-static void verify_fail(const char *prog_name, const char *expected_err_msg)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	struct bpf_program *prog;
-	struct user_ringbuf_fail *skel;
-	int err;
-
-	opts.kernel_log_buf = obj_log_buf;
-	opts.kernel_log_size = log_buf_sz;
-	opts.kernel_log_level = 1;
-
-	skel = user_ringbuf_fail__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts"))
-		goto cleanup;
-
-	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
-	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
-		goto cleanup;
-
-	bpf_program__set_autoload(prog, true);
-
-	bpf_map__set_max_entries(skel->maps.user_ringbuf, getpagesize());
-
-	err = user_ringbuf_fail__load(skel);
-	if (!ASSERT_ERR(err, "unexpected load success"))
-		goto cleanup;
-
-	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
-		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
-		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
-	}
-
-cleanup:
-	user_ringbuf_fail__destroy(skel);
-}
-
 void test_user_ringbuf(void)
 {
 	int i;
@@ -747,10 +692,5 @@ void test_user_ringbuf(void)
 		success_tests[i].test_callback();
 	}
 
-	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
-		if (!test__start_subtest(failure_tests[i].prog_name))
-			continue;
-
-		verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
-	}
+	RUN_TESTS(user_ringbuf_fail);
 }
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
index a1369b5ebcf8..4ad7fe24966d 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -5,6 +5,7 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 
+#include "bpf_misc.h"
 #include "cgrp_kfunc_common.h"
 
 char _license[] SEC("license") = "GPL";
@@ -28,6 +29,7 @@ static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *acquired;
@@ -45,6 +47,7 @@ int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 pointer type STRUCT cgroup must point")
 int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path;
@@ -57,6 +60,7 @@ int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
 }
 
 SEC("kretprobe/cgroup_destroy_locked")
+__failure __msg("reg type unsupported for arg#0 function")
 int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
 {
 	struct cgroup *acquired;
@@ -69,6 +73,7 @@ int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("cgrp_kfunc_acquire_trusted_walked")
 int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *acquired;
@@ -80,8 +85,8 @@ int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char
 	return 0;
 }
 
-
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *acquired;
@@ -96,6 +101,7 @@ int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Unreleased reference")
 int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *acquired;
@@ -108,6 +114,7 @@ int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *pat
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 expected pointer to map value")
 int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *kptr;
@@ -123,6 +130,7 @@ int BPF_PROG(cgrp_kfunc_get_non_kptr_param, struct cgroup *cgrp, const char *pat
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 expected pointer to map value")
 int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *kptr, *acquired;
@@ -141,6 +149,7 @@ int BPF_PROG(cgrp_kfunc_get_non_kptr_acquired, struct cgroup *cgrp, const char *
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 expected pointer to map value")
 int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *kptr;
@@ -156,6 +165,7 @@ int BPF_PROG(cgrp_kfunc_get_null, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Unreleased reference")
 int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *kptr;
@@ -175,6 +185,7 @@ int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Unreleased reference")
 int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *kptr;
@@ -194,6 +205,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket")
 int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
 {
 	struct __cgrps_kfunc_map_value *v;
@@ -209,6 +221,7 @@ int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 pointer type STRUCT cgroup must point")
 int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
 {
 	struct cgroup *acquired = (struct cgroup *)&path;
@@ -220,6 +233,7 @@ int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
 int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
 {
 	struct __cgrps_kfunc_map_value local, *v;
@@ -251,6 +265,7 @@ int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
+__failure __msg("release kernel function bpf_cgroup_release expects")
 int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path)
 {
 	/* Cannot release trusted cgroup pointer which was not acquired. */
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index f4a8250329b2..2fbef3cc7ad8 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -10,6 +10,7 @@
 #include <errno.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
 extern void bpf_key_put(struct bpf_key *key) __ksym;
@@ -19,6 +20,7 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 4096);
 } ringbuf SEC(".maps");
 
 struct {
@@ -33,6 +35,7 @@ int err, pid;
 char _license[] SEC("license") = "GPL";
 
 SEC("?lsm.s/bpf")
+__failure __msg("cannot pass in dynptr at an offset=-8")
 int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
 {
 	unsigned long val;
@@ -42,6 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
 }
 
 SEC("?lsm.s/bpf")
+__failure __msg("arg#0 expected pointer to stack or dynptr_ptr")
 int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
 {
 	unsigned long val;
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
index f3201dc69a60..03ee946c6bf7 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -16,6 +16,7 @@ struct sample {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+	__uint(max_entries, 4096);
 } user_ringbuf SEC(".maps");
 
 struct {
@@ -39,7 +40,8 @@ bad_access1(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read before the pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("negative offset dynptr_ptr ptr")
 int user_ringbuf_callback_bad_access1(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0);
@@ -61,7 +63,8 @@ bad_access2(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read past the end of the pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("dereference of modified dynptr_ptr ptr")
 int user_ringbuf_callback_bad_access2(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0);
@@ -80,7 +83,8 @@ write_forbidden(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'dynptr_ptr'")
 int user_ringbuf_callback_write_forbidden(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0);
@@ -99,7 +103,8 @@ null_context_write(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
 int user_ringbuf_callback_null_context_write(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0);
@@ -120,7 +125,8 @@ null_context_read(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
 int user_ringbuf_callback_null_context_read(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0);
@@ -139,7 +145,8 @@ try_discard_dynptr(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read past the end of the pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("cannot release unowned const bpf_dynptr")
 int user_ringbuf_callback_discard_dynptr(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0);
@@ -158,7 +165,8 @@ try_submit_dynptr(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to read past the end of the pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("cannot release unowned const bpf_dynptr")
 int user_ringbuf_callback_submit_dynptr(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0);
@@ -175,7 +183,8 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
 /* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
  * not be able to write to that pointer.
  */
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("At callback return the register R0 has value")
 int user_ringbuf_callback_invalid_return(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
@@ -197,14 +206,16 @@ try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context)
 	return 0;
 }
 
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("Dynptr has to be an uninitialized dynptr")
 int user_ringbuf_callback_reinit_dynptr_mem(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0);
 	return 0;
 }
 
-SEC("?raw_tp/")
+SEC("?raw_tp")
+__failure __msg("Dynptr has to be an uninitialized dynptr")
 int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
-- 
cgit v1.2.3


From 50a7cedb150a628b54aa7f8ce1e922a72c773273 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 13 Feb 2023 21:13:32 -0800
Subject: selftests/bpf: Clean up dynptr prog_tests

Clean up prog_tests/dynptr.c by removing the unneeded "expected_err_msg"
in the dynptr_tests struct, which is a remnant from converting the fail
tests cases to use the generic verification tester.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://lore.kernel.org/r/20230214051332.4007131-2-joannelkoong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/dynptr.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 7faaf6d9e0d4..b99264ec0d9c 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -5,14 +5,10 @@
 #include "dynptr_fail.skel.h"
 #include "dynptr_success.skel.h"
 
-static struct {
-	const char *prog_name;
-	const char *expected_err_msg;
-} dynptr_tests[] = {
-	/* success cases */
-	{"test_read_write", NULL},
-	{"test_data_slice", NULL},
-	{"test_ringbuf", NULL},
+static const char * const success_tests[] = {
+	"test_read_write",
+	"test_data_slice",
+	"test_ringbuf",
 };
 
 static void verify_success(const char *prog_name)
@@ -53,11 +49,11 @@ void test_dynptr(void)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(dynptr_tests); i++) {
-		if (!test__start_subtest(dynptr_tests[i].prog_name))
+	for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+		if (!test__start_subtest(success_tests[i]))
 			continue;
 
-		verify_success(dynptr_tests[i].prog_name);
+		verify_success(success_tests[i]);
 	}
 
 	RUN_TESTS(dynptr_fail);
-- 
cgit v1.2.3


From 5e53e5c7edc6d69b8cb48b3b370cfe531e4b4132 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@rivosinc.com>
Date: Tue, 14 Feb 2023 17:12:53 +0100
Subject: selftests/bpf: Cross-compile bpftool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the BPF selftests are cross-compiled, only the a host version of
bpftool is built. This version of bpftool is used on the host-side to
generate various intermediates, e.g., skeletons.

The test runners are also using bpftool, so the Makefile will symlink
bpftool from the selftest/bpf root, where the test runners will look
the tool:

  | $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/bootstrap/bpftool \
  |    $(OUTPUT)/$(if $2,$2/)bpftool

There are two problems for cross-compilation builds:

 1. There is no native (cross-compilation target) of bpftool
 2. The bootstrap/bpftool is never cross-compiled (by design)

Make sure that a native/cross-compiled version of bpftool is built,
and if CROSS_COMPILE is set, symlink the native/non-bootstrap version.

Acked-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
Link: https://lore.kernel.org/r/20230214161253.183458-1-bjorn@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f7771592a920..521933bc15fe 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -156,8 +156,9 @@ $(notdir $(TEST_GEN_PROGS)						\
 	 $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
 
 # sort removes libbpf duplicates when not cross-building
-MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf	       \
-	       $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids      \
+MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf	\
+	       $(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool		\
+	       $(HOST_BUILD_DIR)/resolve_btfids				\
 	       $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
 $(MAKE_DIRS):
 	$(call msg,MKDIR,,$@)
@@ -207,6 +208,14 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_tes
 	$(Q)cp bpf_testmod/bpf_testmod.ko $@
 
 DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
+ifneq ($(CROSS_COMPILE),)
+CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
+TRUNNER_BPFTOOL := $(CROSS_BPFTOOL)
+USE_BOOTSTRAP := ""
+else
+TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
+USE_BOOTSTRAP := "bootstrap/"
+endif
 
 $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	       \
@@ -218,7 +227,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
 		    EXTRA_LDFLAGS='$(SAN_LDFLAGS)' &&			       \
 		    cp $(RUNQSLOWER_OUTPUT)runqslower $@
 
-TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
 
@@ -256,6 +265,18 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/			       \
 		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin
 
+ifneq ($(CROSS_COMPILE),)
+$(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)	\
+		    $(BPFOBJ) | $(BUILD_DIR)/bpftool
+	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)				\
+		    ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE)			\
+		    EXTRA_CFLAGS='-g -O0'					\
+		    OUTPUT=$(BUILD_DIR)/bpftool/				\
+		    LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/				\
+		    LIBBPF_DESTDIR=$(SCRATCH_DIR)/				\
+		    prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
+endif
+
 all: docs
 
 docs:
@@ -521,11 +542,12 @@ endif
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ)		\
 			     $(RESOLVE_BTFIDS)				\
+			     $(TRUNNER_BPFTOOL)				\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
 	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
 	$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
-	$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/bootstrap/bpftool \
+	$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
 		   $(OUTPUT)/$(if $2,$2/)bpftool
 
 endef
-- 
cgit v1.2.3


From 62d101d5f422cde39b269f7eb4cbbe2f1e26f9d4 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 14 Feb 2023 15:50:51 -0800
Subject: selftests/bpf: Fix map_kptr test.

The compiler is optimizing out majority of unref_ptr read/writes, so the test
wasn't testing much. For example, one could delete '__kptr' tag from
'struct prog_test_ref_kfunc __kptr *unref_ptr;' and the test would still "pass".

Convert it to volatile stores. Confirmed by comparing bpf asm before/after.

Fixes: 2cbc469a6fc3 ("selftests/bpf: Add C tests for kptr")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20230214235051.22938-1-alexei.starovoitov@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/progs/map_kptr.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index eb8217803493..228ec45365a8 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -62,21 +62,23 @@ extern struct prog_test_ref_kfunc *
 bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
 extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
 
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+
 static void test_kptr_unref(struct map_value *v)
 {
 	struct prog_test_ref_kfunc *p;
 
 	p = v->unref_ptr;
 	/* store untrusted_ptr_or_null_ */
-	v->unref_ptr = p;
+	WRITE_ONCE(v->unref_ptr, p);
 	if (!p)
 		return;
 	if (p->a + p->b > 100)
 		return;
 	/* store untrusted_ptr_ */
-	v->unref_ptr = p;
+	WRITE_ONCE(v->unref_ptr, p);
 	/* store NULL */
-	v->unref_ptr = NULL;
+	WRITE_ONCE(v->unref_ptr, NULL);
 }
 
 static void test_kptr_ref(struct map_value *v)
@@ -85,7 +87,7 @@ static void test_kptr_ref(struct map_value *v)
 
 	p = v->ref_ptr;
 	/* store ptr_or_null_ */
-	v->unref_ptr = p;
+	WRITE_ONCE(v->unref_ptr, p);
 	if (!p)
 		return;
 	if (p->a + p->b > 100)
@@ -99,7 +101,7 @@ static void test_kptr_ref(struct map_value *v)
 		return;
 	}
 	/* store ptr_ */
-	v->unref_ptr = p;
+	WRITE_ONCE(v->unref_ptr, p);
 	bpf_kfunc_call_test_release(p);
 
 	p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
-- 
cgit v1.2.3


From ecdf985d7615356b78241fdb159c091830ed0380 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 15 Feb 2023 01:20:27 +0200
Subject: bpf: track immediate values written to stack by BPF_ST instruction

For aligned stack writes using BPF_ST instruction track stored values
in a same way BPF_STX is handled, e.g. make sure that the following
commands produce similar verifier knowledge:

  fp[-8] = 42;             r1 = 42;
                       fp[-8] = r1;

This covers two cases:
 - non-null values written to stack are stored as spill of fake
   registers;
 - null values written to stack are stored as STACK_ZERO marks.

Previously both cases above used STACK_MISC marks instead.

Some verifier test cases relied on the old logic to obtain STACK_MISC
marks for some stack values. These test cases are updated in the same
commit to avoid failures during bisect.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20230214232030.1502829-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                              |  18 +++-
 .../bpf/verifier/bounds_mix_sign_unsign.c          | 110 ++++++++++++---------
 2 files changed, 80 insertions(+), 48 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 21e08c111702..c28afae60874 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3473,6 +3473,11 @@ static void save_register_state(struct bpf_func_state *state,
 		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
 }
 
+static bool is_bpf_st_mem(struct bpf_insn *insn)
+{
+	return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
+}
+
 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
@@ -3484,8 +3489,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 {
 	struct bpf_func_state *cur; /* state of the current function */
 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
-	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
+	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
 	struct bpf_reg_state *reg = NULL;
+	u32 dst_reg = insn->dst_reg;
 
 	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
 	if (err)
@@ -3538,6 +3544,13 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 				return err;
 		}
 		save_register_state(state, spi, reg, size);
+	} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
+		   insn->imm != 0 && env->bpf_capable) {
+		struct bpf_reg_state fake_reg = {};
+
+		__mark_reg_known(&fake_reg, (u32)insn->imm);
+		fake_reg.type = SCALAR_VALUE;
+		save_register_state(state, spi, &fake_reg, size);
 	} else if (reg && is_spillable_regtype(reg->type)) {
 		/* register containing pointer is being spilled into stack */
 		if (size != BPF_REG_SIZE) {
@@ -3572,7 +3585,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 
 		/* when we zero initialize stack slots mark them as such */
-		if (reg && register_is_null(reg)) {
+		if ((reg && register_is_null(reg)) ||
+		    (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
 			/* backtracking doesn't work for STACK_ZERO yet. */
 			err = mark_chain_precision(env, value_regno);
 			if (err)
diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
index c2aa6f26738b..bf82b923c5fe 100644
--- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
+++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
@@ -1,13 +1,14 @@
 {
 	"bounds checks mixing signed and unsigned, positive bounds",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, 2),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
@@ -17,20 +18,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
@@ -40,20 +42,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 2",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
@@ -65,20 +68,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 3",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
@@ -89,20 +93,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 4",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
@@ -112,19 +117,20 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.result = ACCEPT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 5",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
@@ -135,17 +141,20 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 6",
 	.insns = {
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_6, -1),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
@@ -163,13 +172,14 @@
 {
 	"bounds checks mixing signed and unsigned, variant 7",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
@@ -179,19 +189,20 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.result = ACCEPT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 8",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
@@ -203,20 +214,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 9",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
@@ -228,19 +240,20 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.result = ACCEPT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 10",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
@@ -252,20 +265,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 11",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
@@ -278,20 +292,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 12",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -6),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
@@ -303,20 +318,21 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 13",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, 2),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
@@ -331,7 +347,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
@@ -340,13 +356,14 @@
 	.insns = {
 	BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
 		    offsetof(struct __sk_buff, mark)),
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_MOV64_IMM(BPF_REG_8, 2),
@@ -360,20 +377,21 @@
 	BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
 	BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 	},
-	.fixup_map_hash_8b = { 4 },
+	.fixup_map_hash_8b = { 6 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
 {
 	"bounds checks mixing signed and unsigned, variant 15",
 	.insns = {
+	BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_MOV64_IMM(BPF_REG_2, -6),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
@@ -387,7 +405,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_8b = { 3 },
+	.fixup_map_hash_8b = { 5 },
 	.errstr = "unbounded min value",
 	.result = REJECT,
 },
-- 
cgit v1.2.3


From 1a24af65bb5fed673a9377e794ee3cf416fec64d Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 15 Feb 2023 01:20:28 +0200
Subject: selftests/bpf: check if verifier tracks constants spilled by
 BPF_ST_MEM

Check that verifier tracks the value of 'imm' spilled to stack by
BPF_ST_MEM instruction. Cover the following cases:
- write of non-zero constant to stack;
- write of a zero constant to stack.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20230214232030.1502829-3-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/bpf_st_mem.c | 37 +++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/bpf_st_mem.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
new file mode 100644
index 000000000000..932903f9e585
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -0,0 +1,37 @@
+{
+	"BPF_ST_MEM stack imm non-zero",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 42),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -42),
+	/* if value is tracked correctly R0 is zero */
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	/* Use prog type that requires return value in range [0, 1] */
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+	.runs = -1,
+},
+{
+	"BPF_ST_MEM stack imm zero",
+	.insns = {
+	/* mark stack 0000 0000 */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	/* read and sum a few bytes */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -8),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+	BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -4),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+	BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -1),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+	/* if value is tracked correctly R0 is zero */
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	/* Use prog type that requires return value in range [0, 1] */
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+	.runs = -1,
+},
-- 
cgit v1.2.3


From 2a33c5a25ef4fb574e6744fe7636956b124ad78f Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Wed, 15 Feb 2023 01:20:30 +0200
Subject: selftests/bpf: check if BPF_ST with variable offset preserves
 STACK_ZERO

A test case to verify that variable offset BPF_ST instruction
preserves STACK_ZERO marks when writes zeros, e.g. in the following
situation:

  *(u64*)(r10 - 8) = 0   ; STACK_ZERO marks for fp[-8]
  r0 = random(-7, -1)    ; some random number in range of [-7, -1]
  r0 += r10              ; r0 is now variable offset pointer to stack
  *(u8*)(r0) = 0         ; BPF_ST writing zero, STACK_ZERO mark for
                         ; fp[-8] should be preserved.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20230214232030.1502829-5-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/bpf_st_mem.c | 30 +++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index 932903f9e585..3af2501082b2 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -35,3 +35,33 @@
 	.expected_attach_type = BPF_SK_LOOKUP,
 	.runs = -1,
 },
+{
+	"BPF_ST_MEM stack imm zero, variable offset",
+	.insns = {
+	/* set fp[-16], fp[-24] to zeros */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+	/* r0 = random value in range [-32, -15] */
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_0, 16, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 32),
+	/* fp[r0] = 0, make a variable offset write of zero,
+	 *             this should preserve zero marks on stack.
+	 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_10),
+	BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+	/* r0 = fp[-20], if variable offset write was tracked correctly
+	 *               r0 would be a known zero.
+	 */
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_10, -20),
+	/* Would fail return code verification if r0 range is not tracked correctly. */
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	/* Use prog type that requires return value in range [0, 1] */
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+	.runs = -1,
+},
-- 
cgit v1.2.3


From f88da2d46cc9a19b0c233285339659cae36c5d9a Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 15 Feb 2023 16:21:32 +0800
Subject: selftests/bpf: Add test case for element reuse in htab map

The reinitialization of spin-lock in map value after immediate reuse may
corrupt lookup with BPF_F_LOCK flag and result in hard lock-up, so add
one test case to demonstrate the problem.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20230215082132.3856544-3-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/htab_reuse.c  | 101 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/htab_reuse.c     |  19 ++++
 2 files changed, 120 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/htab_reuse.c
 create mode 100644 tools/testing/selftests/bpf/progs/htab_reuse.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
new file mode 100644
index 000000000000..a742dd994d60
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdbool.h>
+#include <test_progs.h>
+#include "htab_reuse.skel.h"
+
+struct htab_op_ctx {
+	int fd;
+	int loop;
+	bool stop;
+};
+
+struct htab_val {
+	unsigned int lock;
+	unsigned int data;
+};
+
+static void *htab_lookup_fn(void *arg)
+{
+	struct htab_op_ctx *ctx = arg;
+	int i = 0;
+
+	while (i++ < ctx->loop && !ctx->stop) {
+		struct htab_val value;
+		unsigned int key;
+
+		/* Use BPF_F_LOCK to use spin-lock in map value. */
+		key = 7;
+		bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK);
+	}
+
+	return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+	struct htab_op_ctx *ctx = arg;
+	int i = 0;
+
+	while (i++ < ctx->loop && !ctx->stop) {
+		struct htab_val value;
+		unsigned int key;
+
+		key = 7;
+		value.lock = 0;
+		value.data = key;
+		bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK);
+		bpf_map_delete_elem(ctx->fd, &key);
+
+		key = 24;
+		value.lock = 0;
+		value.data = key;
+		bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK);
+		bpf_map_delete_elem(ctx->fd, &key);
+	}
+
+	return NULL;
+}
+
+void test_htab_reuse(void)
+{
+	unsigned int i, wr_nr = 1, rd_nr = 4;
+	pthread_t tids[wr_nr + rd_nr];
+	struct htab_reuse *skel;
+	struct htab_op_ctx ctx;
+	int err;
+
+	skel = htab_reuse__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load"))
+		return;
+
+	ctx.fd = bpf_map__fd(skel->maps.htab);
+	ctx.loop = 500;
+	ctx.stop = false;
+
+	memset(tids, 0, sizeof(tids));
+	for (i = 0; i < wr_nr; i++) {
+		err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+		if (!ASSERT_OK(err, "pthread_create")) {
+			ctx.stop = true;
+			goto reap;
+		}
+	}
+	for (i = 0; i < rd_nr; i++) {
+		err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+		if (!ASSERT_OK(err, "pthread_create")) {
+			ctx.stop = true;
+			goto reap;
+		}
+	}
+
+reap:
+	for (i = 0; i < wr_nr + rd_nr; i++) {
+		if (!tids[i])
+			continue;
+		pthread_join(tids[i], NULL);
+	}
+	htab_reuse__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/htab_reuse.c b/tools/testing/selftests/bpf/progs/htab_reuse.c
new file mode 100644
index 000000000000..7f7368cb3095
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_reuse.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct htab_val {
+	struct bpf_spin_lock lock;
+	unsigned int data;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 64);
+	__type(key, unsigned int);
+	__type(value, struct htab_val);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+} htab SEC(".maps");
-- 
cgit v1.2.3


From 4db98ab445c58bd26c303ef7a10ccd8f049acc22 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:13 +0000
Subject: selftest/bpf/benchs: Fix a typo in bpf_hashmap_full_update

To call the bpf_hashmap_full_update benchmark, one should say:

    bench bpf-hashmap-ful-update

The patch adds a missing 'l' to the benchmark name.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-2-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c      | 2 +-
 tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
index cec51e0ff4b8..44706acf632a 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -85,7 +85,7 @@ void hashmap_report_final(struct bench_res res[], int res_cnt)
 }
 
 const struct bench bench_bpf_hashmap_full_update = {
-	.name = "bpf-hashmap-ful-update",
+	.name = "bpf-hashmap-full-update",
 	.validate = validate,
 	.setup = setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
index 1e2de838f9fa..cd2efd3fdef3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
@@ -6,6 +6,6 @@ source ./benchs/run_common.sh
 set -eufo pipefail
 
 nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1`
-summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update)
+summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update)
 printf "$summary"
 printf "\n"
-- 
cgit v1.2.3


From 2f1c59637fb17dbb2a725c3bd48e4d9d3809df89 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:14 +0000
Subject: selftest/bpf/benchs: Make a function static in
 bpf_hashmap_full_update

The hashmap_report_final callback function defined in the
benchs/bench_bpf_hashmap_full_update.c file should be static.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-3-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
index 44706acf632a..67f76415a362 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -68,7 +68,7 @@ static void setup(void)
 		bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
 }
 
-void hashmap_report_final(struct bench_res res[], int res_cnt)
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
 	int i;
-- 
cgit v1.2.3


From 22ff7aeaa9e3d0533df613da3500db1ecf452253 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:15 +0000
Subject: selftest/bpf/benchs: Enhance argp parsing

To parse command line the bench utility uses the argp_parse() function. This
function takes as an argument a parent 'struct argp' structure which defines
common command line options and an array of children 'struct argp' structures
which defines additional command line options for particular benchmarks. This
implementation doesn't allow benchmarks to share option names, e.g., if two
benchmarks want to use, say, the --option option, then only one of them will
succeed (the first one encountered in the array).  This will be convenient if
same option names could be used in different benchmarks (with the same
semantics, e.g., --nr_loops=N).

Fix this by calling the argp_parse() function twice. The first call is the same
as it was before, with all children argps, and helps to find the benchmark name
and to print a combined help message if anything is wrong.  Given the name, we
can call the argp_parse the second time, but now the children array points only
to a correct benchmark thus always calling the correct parsers. (If there's no
a specific list of arguments, then only one call to argp_parse will be done.)

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-4-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/bench.c                | 44 +++++++++++++++++-----
 tools/testing/selftests/bpf/bench.h                |  1 +
 .../selftests/bpf/benchs/bench_bloom_filter_map.c  |  5 +++
 .../testing/selftests/bpf/benchs/bench_bpf_loop.c  |  1 +
 .../selftests/bpf/benchs/bench_local_storage.c     |  3 ++
 .../benchs/bench_local_storage_rcu_tasks_trace.c   |  1 +
 .../testing/selftests/bpf/benchs/bench_ringbufs.c  |  4 ++
 tools/testing/selftests/bpf/benchs/bench_strncmp.c |  2 +
 8 files changed, 51 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index c1f20a147462..12c3b3ab84aa 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -287,10 +287,11 @@ static const struct argp_child bench_parsers[] = {
 	{},
 };
 
+/* Make pos_args global, so that we can run argp_parse twice, if necessary */
+static int pos_args;
+
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
-	static int pos_args;
-
 	switch (key) {
 	case 'v':
 		env.verbose = true;
@@ -359,7 +360,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	return 0;
 }
 
-static void parse_cmdline_args(int argc, char **argv)
+static void parse_cmdline_args_init(int argc, char **argv)
 {
 	static const struct argp argp = {
 		.options = opts,
@@ -369,9 +370,25 @@ static void parse_cmdline_args(int argc, char **argv)
 	};
 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
 		exit(1);
-	if (!env.list && !env.bench_name) {
-		argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
-		exit(1);
+}
+
+static void parse_cmdline_args_final(int argc, char **argv)
+{
+	struct argp_child bench_parsers[2] = {};
+	const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+		.children = bench_parsers,
+	};
+
+	/* Parse arguments the second time with the correct set of parsers */
+	if (bench->argp) {
+		bench_parsers[0].argp = bench->argp;
+		bench_parsers[0].header = bench->name;
+		pos_args = 0;
+		if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+			exit(1);
 	}
 }
 
@@ -531,15 +548,14 @@ static const struct bench *benchs[] = {
 	&bench_local_storage_tasks_trace,
 };
 
-static void setup_benchmark()
+static void find_benchmark(void)
 {
-	int i, err;
+	int i;
 
 	if (!env.bench_name) {
 		fprintf(stderr, "benchmark name is not specified\n");
 		exit(1);
 	}
-
 	for (i = 0; i < ARRAY_SIZE(benchs); i++) {
 		if (strcmp(benchs[i]->name, env.bench_name) == 0) {
 			bench = benchs[i];
@@ -550,6 +566,11 @@ static void setup_benchmark()
 		fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
 		exit(1);
 	}
+}
+
+static void setup_benchmark(void)
+{
+	int i, err;
 
 	printf("Setting up benchmark '%s'...\n", bench->name);
 
@@ -621,7 +642,7 @@ static void collect_measurements(long delta_ns) {
 
 int main(int argc, char **argv)
 {
-	parse_cmdline_args(argc, argv);
+	parse_cmdline_args_init(argc, argv);
 
 	if (env.list) {
 		int i;
@@ -633,6 +654,9 @@ int main(int argc, char **argv)
 		return 0;
 	}
 
+	find_benchmark();
+	parse_cmdline_args_final(argc, argv);
+
 	setup_benchmark();
 
 	setup_timer();
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index d748255877e2..3c8afa0131a3 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -47,6 +47,7 @@ struct bench_res {
 
 struct bench {
 	const char *name;
+	const struct argp *argp;
 	void (*validate)(void);
 	void (*setup)(void);
 	void *(*producer_thread)(void *ctx);
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
index 5bcb8a8cdeb2..7c8ccc108313 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
@@ -428,6 +428,7 @@ static void *consumer(void *input)
 
 const struct bench bench_bloom_lookup = {
 	.name = "bloom-lookup",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = bloom_lookup_setup,
 	.producer_thread = producer,
@@ -439,6 +440,7 @@ const struct bench bench_bloom_lookup = {
 
 const struct bench bench_bloom_update = {
 	.name = "bloom-update",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = bloom_update_setup,
 	.producer_thread = producer,
@@ -450,6 +452,7 @@ const struct bench bench_bloom_update = {
 
 const struct bench bench_bloom_false_positive = {
 	.name = "bloom-false-positive",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = false_positive_setup,
 	.producer_thread = producer,
@@ -461,6 +464,7 @@ const struct bench bench_bloom_false_positive = {
 
 const struct bench bench_hashmap_without_bloom = {
 	.name = "hashmap-without-bloom",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = hashmap_no_bloom_setup,
 	.producer_thread = producer,
@@ -472,6 +476,7 @@ const struct bench bench_hashmap_without_bloom = {
 
 const struct bench bench_hashmap_with_bloom = {
 	.name = "hashmap-with-bloom",
+	.argp = &bench_bloom_map_argp,
 	.validate = validate,
 	.setup = hashmap_with_bloom_setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
index d0a6572bfab6..d8a0394e10b1 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
@@ -95,6 +95,7 @@ static void setup(void)
 
 const struct bench bench_bpf_loop = {
 	.name = "bpf-loop",
+	.argp = &bench_bpf_loop_argp,
 	.validate = validate,
 	.setup = setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
index 5a378c84e81f..d4b2817306d4 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -255,6 +255,7 @@ static void *producer(void *input)
  */
 const struct bench bench_local_storage_cache_seq_get = {
 	.name = "local-storage-cache-seq-get",
+	.argp = &bench_local_storage_argp,
 	.validate = validate,
 	.setup = local_storage_cache_get_setup,
 	.producer_thread = producer,
@@ -266,6 +267,7 @@ const struct bench bench_local_storage_cache_seq_get = {
 
 const struct bench bench_local_storage_cache_interleaved_get = {
 	.name = "local-storage-cache-int-get",
+	.argp = &bench_local_storage_argp,
 	.validate = validate,
 	.setup = local_storage_cache_get_interleaved_setup,
 	.producer_thread = producer,
@@ -277,6 +279,7 @@ const struct bench bench_local_storage_cache_interleaved_get = {
 
 const struct bench bench_local_storage_cache_hashmap_control = {
 	.name = "local-storage-cache-hashmap-control",
+	.argp = &bench_local_storage_argp,
 	.validate = validate,
 	.setup = hashmap_setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
index 43f109d93130..4f9401ecf09c 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -271,6 +271,7 @@ static void report_final(struct bench_res res[], int res_cnt)
  */
 const struct bench bench_local_storage_tasks_trace = {
 	.name = "local-storage-tasks-trace",
+	.argp = &bench_local_storage_rcu_tasks_trace_argp,
 	.validate = validate,
 	.setup = local_storage_tasks_trace_setup,
 	.producer_thread = producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index c2554f9695ff..fc91fdac4faa 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -518,6 +518,7 @@ static void *perfbuf_custom_consumer(void *input)
 
 const struct bench bench_rb_libbpf = {
 	.name = "rb-libbpf",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = ringbuf_libbpf_setup,
 	.producer_thread = bufs_sample_producer,
@@ -529,6 +530,7 @@ const struct bench bench_rb_libbpf = {
 
 const struct bench bench_rb_custom = {
 	.name = "rb-custom",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = ringbuf_custom_setup,
 	.producer_thread = bufs_sample_producer,
@@ -540,6 +542,7 @@ const struct bench bench_rb_custom = {
 
 const struct bench bench_pb_libbpf = {
 	.name = "pb-libbpf",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = perfbuf_libbpf_setup,
 	.producer_thread = bufs_sample_producer,
@@ -551,6 +554,7 @@ const struct bench bench_pb_libbpf = {
 
 const struct bench bench_pb_custom = {
 	.name = "pb-custom",
+	.argp = &bench_ringbufs_argp,
 	.validate = bufs_validate,
 	.setup = perfbuf_libbpf_setup,
 	.producer_thread = bufs_sample_producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
index 494b591c0289..d3fad2ba6916 100644
--- a/tools/testing/selftests/bpf/benchs/bench_strncmp.c
+++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
@@ -140,6 +140,7 @@ static void strncmp_measure(struct bench_res *res)
 
 const struct bench bench_strncmp_no_helper = {
 	.name = "strncmp-no-helper",
+	.argp = &bench_strncmp_argp,
 	.validate = strncmp_validate,
 	.setup = strncmp_no_helper_setup,
 	.producer_thread = strncmp_producer,
@@ -151,6 +152,7 @@ const struct bench bench_strncmp_no_helper = {
 
 const struct bench bench_strncmp_helper = {
 	.name = "strncmp-helper",
+	.argp = &bench_strncmp_argp,
 	.validate = strncmp_validate,
 	.setup = strncmp_helper_setup,
 	.producer_thread = strncmp_producer,
-- 
cgit v1.2.3


From 9644546260eac49348b2c0694b01bdf72c627194 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:16 +0000
Subject: selftest/bpf/benchs: Remove an unused header

The benchs/bench_bpf_hashmap_full_update.c doesn't set a custom argp,
so it shouldn't include the <argp.h> header.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-5-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
index 67f76415a362..75abe8137b6c 100644
--- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2022 Bytedance */
 
-#include <argp.h>
 #include "bench.h"
 #include "bpf_hashmap_full_update_bench.skel.h"
 #include "bpf_util.h"
-- 
cgit v1.2.3


From 90c22503cd8910c54a8cd4bfe5bb6873d9ba8e0b Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:17 +0000
Subject: selftest/bpf/benchs: Make quiet option common

The "local-storage-tasks-trace" benchmark has a `--quiet` option. Move it to
the list of common options, so that the main code and other benchmarks can use
(new) env.quiet variable. Patch the run_bench_local_storage_rcu_tasks_trace.sh
helper script accordingly.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-6-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/bench.c                       |  5 +++++
 tools/testing/selftests/bpf/bench.h                       |  1 +
 .../bpf/benchs/bench_local_storage_rcu_tasks_trace.c      | 15 +--------------
 .../bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh |  2 +-
 4 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 12c3b3ab84aa..23c24c346130 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -16,6 +16,7 @@ struct env env = {
 	.warmup_sec = 1,
 	.duration_sec = 5,
 	.affinity = false,
+	.quiet = false,
 	.consumer_cnt = 1,
 	.producer_cnt = 1,
 };
@@ -262,6 +263,7 @@ static const struct argp_option opts[] = {
 	{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
 	{ "verbose", 'v', NULL, 0, "Verbose debug output"},
 	{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+	{ "quiet", 'q', NULL, 0, "Be more quiet"},
 	{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
 	  "Set of CPUs for producer threads; implies --affinity"},
 	{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
@@ -330,6 +332,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 'a':
 		env.affinity = true;
 		break;
+	case 'q':
+		env.quiet = true;
+		break;
 	case ARG_PROD_AFFINITY_SET:
 		env.affinity = true;
 		if (parse_num_list(arg, &env.prod_cpus.cpus,
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 3c8afa0131a3..402729c6a3ac 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -24,6 +24,7 @@ struct env {
 	bool verbose;
 	bool list;
 	bool affinity;
+	bool quiet;
 	int consumer_cnt;
 	int producer_cnt;
 	struct cpu_set prod_cpus;
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
index 4f9401ecf09c..d5eb5587f2aa 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -12,17 +12,14 @@
 static struct {
 	__u32 nr_procs;
 	__u32 kthread_pid;
-	bool quiet;
 } args = {
 	.nr_procs = 1000,
 	.kthread_pid = 0,
-	.quiet = false,
 };
 
 enum {
 	ARG_NR_PROCS = 7000,
 	ARG_KTHREAD_PID = 7001,
-	ARG_QUIET = 7002,
 };
 
 static const struct argp_option opts[] = {
@@ -30,8 +27,6 @@ static const struct argp_option opts[] = {
 		"Set number of user processes to spin up"},
 	{ "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
 		"Pid of rcu_tasks_trace kthread for ticks tracking"},
-	{ "quiet", ARG_QUIET, "{0,1}", 0,
-		"If true, don't report progress"},
 	{},
 };
 
@@ -56,14 +51,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 		}
 		args.kthread_pid = ret;
 		break;
-	case ARG_QUIET:
-		ret = strtol(arg, NULL, 10);
-		if (ret < 0 || ret > 1) {
-			fprintf(stderr, "invalid quiet %ld\n", ret);
-			argp_usage(state);
-		}
-		args.quiet = ret;
-		break;
 break;
 	default:
 		return ARGP_ERR_UNKNOWN;
@@ -230,7 +217,7 @@ static void report_progress(int iter, struct bench_res *res, long delta_ns)
 		exit(1);
 	}
 
-	if (args.quiet)
+	if (env.quiet)
 		return;
 
 	printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
index 5dac1f02892c..3e8a969f2096 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
@@ -8,4 +8,4 @@ if [ -z $kthread_pid ]; then
 	exit 1
 fi
 
-./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace
-- 
cgit v1.2.3


From a237dda05e9101404a634ac53ee65c8f8c8fce58 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:18 +0000
Subject: selftest/bpf/benchs: Print less if the quiet option is set

The bench utility will print

    Setting up benchmark '<bench-name>'...
    Benchmark '<bench-name>' started.

on startup to stdout. Suppress this output if --quiet option if given. This
makes it simpler to parse benchmark output by a script.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-7-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/bench.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 23c24c346130..767ca679ee67 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -577,7 +577,8 @@ static void setup_benchmark(void)
 {
 	int i, err;
 
-	printf("Setting up benchmark '%s'...\n", bench->name);
+	if (!env.quiet)
+		printf("Setting up benchmark '%s'...\n", bench->name);
 
 	state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
 	state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
@@ -623,7 +624,8 @@ static void setup_benchmark(void)
 					    next_cpu(&env.prod_cpus));
 	}
 
-	printf("Benchmark '%s' started.\n", bench->name);
+	if (!env.quiet)
+		printf("Benchmark '%s' started.\n", bench->name);
 }
 
 static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
-- 
cgit v1.2.3


From f371f2dc53d107af25171f29c852a3908ee0afb6 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <aspsk@isovalent.com>
Date: Mon, 13 Feb 2023 09:15:19 +0000
Subject: selftest/bpf/benchs: Add benchmark for hashmap lookups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new benchmark which measures hashmap lookup operations speed.  A user can
control the following parameters of the benchmark:

    * key_size (max 1024): the key size to use
    * max_entries: the hashmap max entries
    * nr_entries: the number of entries to insert/lookup
    * nr_loops: the number of loops for the benchmark
    * map_flags The hashmap flags passed to BPF_MAP_CREATE

The BPF program performing the benchmarks calls two nested bpf_loop:

    bpf_loop(nr_loops/nr_entries)
            bpf_loop(nr_entries)
                     bpf_map_lookup()

So the nr_loops determines the number of actual map lookups. All lookups are
successful.

Example (the output is generated on a AMD Ryzen 9 3950X machine):

    for nr_entries in `seq 4096 4096 65536`; do echo -n "$((nr_entries*100/65536))% full: "; sudo ./bench -d2 -a bpf-hashmap-lookup --key_size=4 --nr_entries=$nr_entries --max_entries=65536 --nr_loops=1000000 --map_flags=0x40 | grep cpu; done
    6% full: cpu01: lookup 50.739M ± 0.018M events/sec (approximated from 32 samples of ~19ms)
    12% full: cpu01: lookup 47.751M ± 0.015M events/sec (approximated from 32 samples of ~20ms)
    18% full: cpu01: lookup 45.153M ± 0.013M events/sec (approximated from 32 samples of ~22ms)
    25% full: cpu01: lookup 43.826M ± 0.014M events/sec (approximated from 32 samples of ~22ms)
    31% full: cpu01: lookup 41.971M ± 0.012M events/sec (approximated from 32 samples of ~23ms)
    37% full: cpu01: lookup 41.034M ± 0.015M events/sec (approximated from 32 samples of ~24ms)
    43% full: cpu01: lookup 39.946M ± 0.012M events/sec (approximated from 32 samples of ~25ms)
    50% full: cpu01: lookup 38.256M ± 0.014M events/sec (approximated from 32 samples of ~26ms)
    56% full: cpu01: lookup 36.580M ± 0.018M events/sec (approximated from 32 samples of ~27ms)
    62% full: cpu01: lookup 36.252M ± 0.012M events/sec (approximated from 32 samples of ~27ms)
    68% full: cpu01: lookup 35.200M ± 0.012M events/sec (approximated from 32 samples of ~28ms)
    75% full: cpu01: lookup 34.061M ± 0.009M events/sec (approximated from 32 samples of ~29ms)
    81% full: cpu01: lookup 34.374M ± 0.010M events/sec (approximated from 32 samples of ~29ms)
    87% full: cpu01: lookup 33.244M ± 0.011M events/sec (approximated from 32 samples of ~30ms)
    93% full: cpu01: lookup 32.182M ± 0.013M events/sec (approximated from 32 samples of ~31ms)
    100% full: cpu01: lookup 31.497M ± 0.016M events/sec (approximated from 32 samples of ~31ms)

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230213091519.1202813-8-aspsk@isovalent.com
---
 tools/testing/selftests/bpf/Makefile               |   5 +-
 tools/testing/selftests/bpf/bench.c                |   4 +
 .../bpf/benchs/bench_bpf_hashmap_lookup.c          | 283 +++++++++++++++++++++
 .../selftests/bpf/progs/bpf_hashmap_lookup.c       |  63 +++++
 4 files changed, 354 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 521933bc15fe..b677dcd0b77a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -638,6 +638,7 @@ $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
 $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
 $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
+$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -652,7 +653,9 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 		 $(OUTPUT)/bench_strncmp.o \
 		 $(OUTPUT)/bench_bpf_hashmap_full_update.o \
 		 $(OUTPUT)/bench_local_storage.o \
-		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o
+		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
+		 $(OUTPUT)/bench_bpf_hashmap_lookup.o \
+		 #
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
 
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 767ca679ee67..0b2a53bb8460 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -277,6 +277,7 @@ extern struct argp bench_bpf_loop_argp;
 extern struct argp bench_local_storage_argp;
 extern struct argp bench_local_storage_rcu_tasks_trace_argp;
 extern struct argp bench_strncmp_argp;
+extern struct argp bench_hashmap_lookup_argp;
 
 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -286,6 +287,7 @@ static const struct argp_child bench_parsers[] = {
 	{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
 	{ &bench_local_storage_rcu_tasks_trace_argp, 0,
 		"local_storage RCU Tasks Trace slowdown benchmark", 0 },
+	{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
 	{},
 };
 
@@ -512,6 +514,7 @@ extern const struct bench bench_local_storage_cache_seq_get;
 extern const struct bench bench_local_storage_cache_interleaved_get;
 extern const struct bench bench_local_storage_cache_hashmap_control;
 extern const struct bench bench_local_storage_tasks_trace;
+extern const struct bench bench_bpf_hashmap_lookup;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -551,6 +554,7 @@ static const struct bench *benchs[] = {
 	&bench_local_storage_cache_interleaved_get,
 	&bench_local_storage_cache_hashmap_control,
 	&bench_local_storage_tasks_trace,
+	&bench_bpf_hashmap_lookup,
 };
 
 static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
new file mode 100644
index 000000000000..8dbb02f75cff
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <sys/random.h>
+#include <argp.h>
+#include "bench.h"
+#include "bpf_hashmap_lookup.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+	struct bpf_hashmap_lookup *skel;
+} ctx;
+
+/* only available to kernel, so define it here */
+#define BPF_MAX_LOOPS (1<<23)
+
+#define MAX_KEY_SIZE 1024 /* the size of the key map */
+
+static struct {
+	__u32 key_size;
+	__u32 map_flags;
+	__u32 max_entries;
+	__u32 nr_entries;
+	__u32 nr_loops;
+} args = {
+	.key_size = 4,
+	.map_flags = 0,
+	.max_entries = 1000,
+	.nr_entries = 500,
+	.nr_loops = 1000000,
+};
+
+enum {
+	ARG_KEY_SIZE = 8001,
+	ARG_MAP_FLAGS,
+	ARG_MAX_ENTRIES,
+	ARG_NR_ENTRIES,
+	ARG_NR_LOOPS,
+};
+
+static const struct argp_option opts[] = {
+	{ "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0,
+	  "The hashmap key size (max 1024)"},
+	{ "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0,
+	  "The hashmap flags passed to BPF_MAP_CREATE"},
+	{ "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0,
+	  "The hashmap max entries"},
+	{ "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+	  "The number of entries to insert/lookup"},
+	{ "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0,
+	  "The number of loops for the benchmark"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	long ret;
+
+	switch (key) {
+	case ARG_KEY_SIZE:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > MAX_KEY_SIZE) {
+			fprintf(stderr, "invalid key_size");
+			argp_usage(state);
+		}
+		args.key_size = ret;
+		break;
+	case ARG_MAP_FLAGS:
+		ret = strtol(arg, NULL, 0);
+		if (ret < 0 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid map_flags");
+			argp_usage(state);
+		}
+		args.map_flags = ret;
+		break;
+	case ARG_MAX_ENTRIES:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid max_entries");
+			argp_usage(state);
+		}
+		args.max_entries = ret;
+		break;
+	case ARG_NR_ENTRIES:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid nr_entries");
+			argp_usage(state);
+		}
+		args.nr_entries = ret;
+		break;
+	case ARG_NR_LOOPS:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > BPF_MAX_LOOPS) {
+			fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n",
+				ret, BPF_MAX_LOOPS);
+			argp_usage(state);
+		}
+		args.nr_loops = ret;
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_hashmap_lookup_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+static void validate(void)
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.nr_entries > args.max_entries) {
+		fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n",
+			args.max_entries, args.nr_entries);
+		exit(1);
+	}
+}
+
+static void *producer(void *input)
+{
+	while (true) {
+		/* trigger the bpf program */
+		syscall(__NR_getpgid);
+	}
+	return NULL;
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static inline void patch_key(u32 i, u32 *key)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	*key = i + 1;
+#else
+	*key = __builtin_bswap32(i + 1);
+#endif
+	/* the rest of key is random */
+}
+
+static void setup(void)
+{
+	struct bpf_link *link;
+	int map_fd;
+	int ret;
+	int i;
+
+	setup_libbpf();
+
+	ctx.skel = bpf_hashmap_lookup__open();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries);
+	bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size);
+	bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8);
+	bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags);
+
+	ctx.skel->bss->nr_entries = args.nr_entries;
+	ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries;
+
+	if (args.key_size > 4) {
+		for (i = 1; i < args.key_size/4; i++)
+			ctx.skel->bss->key[i] = 2654435761 * i;
+	}
+
+	ret = bpf_hashmap_lookup__load(ctx.skel);
+	if (ret) {
+		bpf_hashmap_lookup__destroy(ctx.skel);
+		fprintf(stderr, "failed to load map: %s", strerror(-ret));
+		exit(1);
+	}
+
+	/* fill in the hash_map */
+	map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+	for (u64 i = 0; i < args.nr_entries; i++) {
+		patch_key(i, ctx.skel->bss->key);
+		bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY);
+	}
+
+	link = bpf_program__attach(ctx.skel->progs.benchmark);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static inline double events_from_time(u64 time)
+{
+	if (time)
+		return args.nr_loops * 1000000000llu / time / 1000000.0L;
+
+	return 0;
+}
+
+static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time)
+{
+	int i, n = 0;
+
+	*events_mean = 0;
+	*events_stddev = 0;
+	*mean_time = 0;
+
+	for (i = 0; i < 32; i++) {
+		if (!times[i])
+			break;
+		*mean_time += times[i];
+		*events_mean += events_from_time(times[i]);
+		n += 1;
+	}
+	if (!n)
+		return 0;
+
+	*mean_time /= n;
+	*events_mean /= n;
+
+	if (n > 1) {
+		for (i = 0; i < n; i++) {
+			double events_i = *events_mean - events_from_time(times[i]);
+			*events_stddev += events_i * events_i / (n - 1);
+		}
+		*events_stddev = sqrt(*events_stddev);
+	}
+
+	return n;
+}
+
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	double events_mean, events_stddev;
+	u64 mean_time;
+	int i, n;
+
+	for (i = 0; i < nr_cpus; i++) {
+		n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean,
+				   &events_stddev, &mean_time);
+		if (n == 0)
+			continue;
+
+		if (env.quiet) {
+			/* we expect only one cpu to be present */
+			if (env.affinity)
+				printf("%.3lf\n", events_mean);
+			else
+				printf("cpu%02d %.3lf\n", i, events_mean);
+		} else {
+			printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec"
+			       " (approximated from %d samples of ~%lums)\n",
+			       i, events_mean, 2*events_stddev,
+			       n, mean_time / 1000000);
+		}
+	}
+}
+
+const struct bench bench_bpf_hashmap_lookup = {
+	.name = "bpf-hashmap-lookup",
+	.argp = &bench_hashmap_lookup_argp,
+	.validate = validate,
+	.setup = setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = NULL,
+	.report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
new file mode 100644
index 000000000000..1eb74ddca414
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+} hash_map_bench SEC(".maps");
+
+/* The number of slots to store times */
+#define NR_SLOTS 32
+#define NR_CPUS 256
+#define CPU_MASK (NR_CPUS-1)
+
+/* Configured by userspace */
+u64 nr_entries;
+u64 nr_loops;
+u32 __attribute__((__aligned__(8))) key[NR_CPUS];
+
+/* Filled by us */
+u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS];
+u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS];
+
+static inline void patch_key(u32 i)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	key[0] = i + 1;
+#else
+	key[0] = __builtin_bswap32(i + 1);
+#endif
+	/* the rest of key is random and is configured by userspace */
+}
+
+static int lookup_callback(__u32 index, u32 *unused)
+{
+	patch_key(index);
+	return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1;
+}
+
+static int loop_lookup_callback(__u32 index, u32 *unused)
+{
+	return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+	u32 cpu = bpf_get_smp_processor_id();
+	u32 times_index;
+	u64 start_time;
+
+	times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS;
+	start_time = bpf_ktime_get_ns();
+	bpf_loop(nr_loops, loop_lookup_callback, NULL, 0);
+	percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time;
+	percpu_times_index[cpu & CPU_MASK] += 1;
+	return 0;
+}
-- 
cgit v1.2.3


From 6c20822fada1b8adb77fa450d03a0d449686a4a9 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Wed, 15 Feb 2023 19:54:40 +0100
Subject: bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

&xdp_buff and &xdp_frame are bound in a way that

xdp_buff->data_hard_start == xdp_frame

It's always the case and e.g. xdp_convert_buff_to_frame() relies on
this.
IOW, the following:

	for (u32 i = 0; i < 0xdead; i++) {
		xdpf = xdp_convert_buff_to_frame(&xdp);
		xdp_convert_frame_to_buff(xdpf, &xdp);
	}

shouldn't ever modify @xdpf's contents or the pointer itself.
However, "live packet" code wrongly treats &xdp_frame as part of its
context placed *before* the data_hard_start. With such flow,
data_hard_start is sizeof(*xdpf) off to the right and no longer points
to the XDP frame.

Instead of replacing `sizeof(ctx)` with `offsetof(ctx, xdpf)` in several
places and praying that there are no more miscalcs left somewhere in the
code, unionize ::frm with ::data in a flex array, so that both starts
pointing to the actual data_hard_start and the XDP frame actually starts
being a part of it, i.e. a part of the headroom, not the context.
A nice side effect is that the maximum frame size for this mode gets
increased by 40 bytes, as xdp_buff::frame_sz includes everything from
data_hard_start (-> includes xdpf already) to the end of XDP/skb shared
info.
Also update %MAX_PKT_SIZE accordingly in the selftests code. Leave it
hardcoded for 64 bit && 4k pages, it can be made more flexible later on.

Minor: align `&head->data` with how `head->frm` is assigned for
consistency.
Minor #2: rename 'frm' to 'frame' in &xdp_page_head while at it for
clarity.

(was found while testing XDP traffic generator on ice, which calls
 xdp_convert_frame_to_buff() for each XDP frame)

Fixes: b530e9e1063e ("bpf: Add "live packet" mode for XDP in BPF_PROG_RUN")
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20230215185440.4126672-1-aleksander.lobakin@intel.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 net/bpf/test_run.c                                 | 29 +++++++++++++++++-----
 .../selftests/bpf/prog_tests/xdp_do_redirect.c     |  7 +++---
 2 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index b766a84c8536..1ab396a2b87f 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -97,8 +97,11 @@ reset:
 struct xdp_page_head {
 	struct xdp_buff orig_ctx;
 	struct xdp_buff ctx;
-	struct xdp_frame frm;
-	u8 data[];
+	union {
+		/* ::data_hard_start starts here */
+		DECLARE_FLEX_ARRAY(struct xdp_frame, frame);
+		DECLARE_FLEX_ARRAY(u8, data);
+	};
 };
 
 struct xdp_test_data {
@@ -116,6 +119,20 @@ struct xdp_test_data {
 #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
 #define TEST_XDP_MAX_BATCH 256
 
+#if BITS_PER_LONG == 64 && PAGE_SIZE == SZ_4K
+/* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE
+ * must be updated accordingly when any of these changes, otherwise BPF
+ * selftests will fail.
+ */
+#ifdef __s390x__
+#define TEST_MAX_PKT_SIZE 3216
+#else
+#define TEST_MAX_PKT_SIZE 3408
+#endif
+static_assert(SKB_WITH_OVERHEAD(TEST_XDP_FRAME_SIZE - XDP_PACKET_HEADROOM) ==
+	      TEST_MAX_PKT_SIZE);
+#endif
+
 static void xdp_test_run_init_page(struct page *page, void *arg)
 {
 	struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
@@ -132,8 +149,8 @@ static void xdp_test_run_init_page(struct page *page, void *arg)
 	headroom -= meta_len;
 
 	new_ctx = &head->ctx;
-	frm = &head->frm;
-	data = &head->data;
+	frm = head->frame;
+	data = head->data;
 	memcpy(data + headroom, orig_ctx->data_meta, frm_len);
 
 	xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq);
@@ -223,7 +240,7 @@ static void reset_ctx(struct xdp_page_head *head)
 	head->ctx.data = head->orig_ctx.data;
 	head->ctx.data_meta = head->orig_ctx.data_meta;
 	head->ctx.data_end = head->orig_ctx.data_end;
-	xdp_update_frame_from_buff(&head->ctx, &head->frm);
+	xdp_update_frame_from_buff(&head->ctx, head->frame);
 }
 
 static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
@@ -285,7 +302,7 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
 		head = phys_to_virt(page_to_phys(page));
 		reset_ctx(head);
 		ctx = &head->ctx;
-		frm = &head->frm;
+		frm = head->frame;
 		xdp->frame_cnt++;
 
 		act = bpf_prog_run_xdp(prog, ctx);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 2666c84dbd01..7271a18ab3e2 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -65,12 +65,13 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
 }
 
 /* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
- * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes
+ * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM =
+ * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one.
  */
 #if defined(__s390x__)
-#define MAX_PKT_SIZE 3176
+#define MAX_PKT_SIZE 3216
 #else
-#define MAX_PKT_SIZE 3368
+#define MAX_PKT_SIZE 3408
 #endif
 static void test_max_pkt_size(int fd)
 {
-- 
cgit v1.2.3


From f58531716ced8975a4ade108ef4af35f98722af7 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Tue, 14 Feb 2023 10:52:37 +0100
Subject: selftests: forwarding: tc_actions: cleanup temporary files when test
 is aborted

remove temporary files created by 'mirred_egress_to_ingress_tcp' test
in the cleanup() handler. Also, change variable names to avoid clashing
with globals from lib.sh.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/r/091649045a017fc00095ecbb75884e5681f7025f.1676368027.git.dcaratti@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/tc_actions.sh | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 919c0dd9fe4b..a96cff8e7219 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -201,10 +201,10 @@ mirred_egress_to_ingress_test()
 
 mirred_egress_to_ingress_tcp_test()
 {
-	local tmpfile=$(mktemp) tmpfile1=$(mktemp)
+	mirred_e2i_tf1=$(mktemp) mirred_e2i_tf2=$(mktemp)
 
 	RET=0
-	dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile
+	dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$mirred_e2i_tf1
 	tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
 		$tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
 			action ct commit nat src addr 192.0.2.2 pipe \
@@ -220,11 +220,11 @@ mirred_egress_to_ingress_tcp_test()
 		ip_proto icmp \
 			action drop
 
-	ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1  &
+	ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2  &
 	local rpid=$!
-	ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile
+	ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
 	wait -n $rpid
-	cmp -s $tmpfile $tmpfile1
+	cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
 	check_err $? "server output check failed"
 
 	$MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
@@ -241,7 +241,7 @@ mirred_egress_to_ingress_tcp_test()
 	tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
 	tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
 
-	rm -f $tmpfile $tmpfile1
+	rm -f $mirred_e2i_tf1 $mirred_e2i_tf2
 	log_test "mirred_egress_to_ingress_tcp ($tcflags)"
 }
 
@@ -270,6 +270,8 @@ setup_prepare()
 
 cleanup()
 {
+	local tf
+
 	pre_cleanup
 
 	switch_destroy
@@ -280,6 +282,8 @@ cleanup()
 
 	ip link set $swp2 address $swp2origmac
 	ip link set $swp1 address $swp1origmac
+
+	for tf in $mirred_e2i_tf1 $mirred_e2i_tf2; do rm -f $tf; done
 }
 
 mirred_egress_redirect_test()
-- 
cgit v1.2.3


From 051d442098421c28c7951625652f61b1e15c4bd5 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 14 Feb 2023 08:49:11 -0500
Subject: net/sched: Retire CBQ qdisc

While this amazing qdisc has served us well over the years it has not been
getting any tender love and care and has bitrotted over time.
It has become mostly a shooting target for syzkaller lately.
For this reason, we are retiring it. Goodbye CBQ - we loved you.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sched/Kconfig                                  |   17 -
 net/sched/Makefile                                 |    1 -
 net/sched/sch_cbq.c                                | 1727 --------------------
 .../selftests/tc-testing/tc-tests/qdiscs/cbq.json  |  184 ---
 4 files changed, 1929 deletions(-)
 delete mode 100644 net/sched/sch_cbq.c
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json

(limited to 'tools/testing')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4f7b52f5a11c..960e30b6b746 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -45,23 +45,6 @@ if NET_SCHED
 
 comment "Queueing/Scheduling"
 
-config NET_SCH_CBQ
-	tristate "Class Based Queueing (CBQ)"
-	help
-	  Say Y here if you want to use the Class-Based Queueing (CBQ) packet
-	  scheduling algorithm. This algorithm classifies the waiting packets
-	  into a tree-like hierarchy of classes; the leaves of this tree are
-	  in turn scheduled by separate algorithms.
-
-	  See the top of <file:net/sched/sch_cbq.c> for more details.
-
-	  CBQ is a commonly used scheduler, so if you're unsure, you should
-	  say Y here. Then say Y to all the queueing algorithms below that you
-	  want to use as leaf disciplines.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called sch_cbq.
-
 config NET_SCH_HTB
 	tristate "Hierarchical Token Bucket (HTB)"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7911eec09837..9cc7d74b9c4a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -33,7 +33,6 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
 obj-$(CONFIG_NET_ACT_CT)	+= act_ct.o
 obj-$(CONFIG_NET_ACT_GATE)	+= act_gate.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
-obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
 obj-$(CONFIG_NET_SCH_HFSC)	+= sch_hfsc.o
 obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
deleted file mode 100644
index 36db5f6782f2..000000000000
--- a/net/sched/sch_cbq.c
+++ /dev/null
@@ -1,1727 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/sch_cbq.c	Class-Based Queueing discipline.
- *
- * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-
-/*	Class-Based Queueing (CBQ) algorithm.
-	=======================================
-
-	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
-		 Management Models for Packet Networks",
-		 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
-
-		 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
-
-		 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
-		 Parameters", 1996
-
-		 [4] Sally Floyd and Michael Speer, "Experimental Results
-		 for Class-Based Queueing", 1998, not published.
-
-	-----------------------------------------------------------------------
-
-	Algorithm skeleton was taken from NS simulator cbq.cc.
-	If someone wants to check this code against the LBL version,
-	he should take into account that ONLY the skeleton was borrowed,
-	the implementation is different. Particularly:
-
-	--- The WRR algorithm is different. Our version looks more
-	reasonable (I hope) and works when quanta are allowed to be
-	less than MTU, which is always the case when real time classes
-	have small rates. Note, that the statement of [3] is
-	incomplete, delay may actually be estimated even if class
-	per-round allotment is less than MTU. Namely, if per-round
-	allotment is W*r_i, and r_1+...+r_k = r < 1
-
-	delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
-
-	In the worst case we have IntServ estimate with D = W*r+k*MTU
-	and C = MTU*r. The proof (if correct at all) is trivial.
-
-
-	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
-	interpret some places, which look like wrong translations
-	from NS. Anyone is advised to find these differences
-	and explain to me, why I am wrong 8).
-
-	--- Linux has no EOI event, so that we cannot estimate true class
-	idle time. Workaround is to consider the next dequeue event
-	as sign that previous packet is finished. This is wrong because of
-	internal device queueing, but on a permanently loaded link it is true.
-	Moreover, combined with clock integrator, this scheme looks
-	very close to an ideal solution.  */
-
-struct cbq_sched_data;
-
-
-struct cbq_class {
-	struct Qdisc_class_common common;
-	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
-
-/* Parameters */
-	unsigned char		priority;	/* class priority */
-	unsigned char		priority2;	/* priority to be used after overlimit */
-	unsigned char		ewma_log;	/* time constant for idle time calculation */
-
-	u32			defmap;
-
-	/* Link-sharing scheduler parameters */
-	long			maxidle;	/* Class parameters: see below. */
-	long			offtime;
-	long			minidle;
-	u32			avpkt;
-	struct qdisc_rate_table	*R_tab;
-
-	/* General scheduler (WRR) parameters */
-	long			allot;
-	long			quantum;	/* Allotment per WRR round */
-	long			weight;		/* Relative allotment: see below */
-
-	struct Qdisc		*qdisc;		/* Ptr to CBQ discipline */
-	struct cbq_class	*split;		/* Ptr to split node */
-	struct cbq_class	*share;		/* Ptr to LS parent in the class tree */
-	struct cbq_class	*tparent;	/* Ptr to tree parent in the class tree */
-	struct cbq_class	*borrow;	/* NULL if class is bandwidth limited;
-						   parent otherwise */
-	struct cbq_class	*sibling;	/* Sibling chain */
-	struct cbq_class	*children;	/* Pointer to children chain */
-
-	struct Qdisc		*q;		/* Elementary queueing discipline */
-
-
-/* Variables */
-	unsigned char		cpriority;	/* Effective priority */
-	unsigned char		delayed;
-	unsigned char		level;		/* level of the class in hierarchy:
-						   0 for leaf classes, and maximal
-						   level of children + 1 for nodes.
-						 */
-
-	psched_time_t		last;		/* Last end of service */
-	psched_time_t		undertime;
-	long			avgidle;
-	long			deficit;	/* Saved deficit for WRR */
-	psched_time_t		penalized;
-	struct gnet_stats_basic_sync bstats;
-	struct gnet_stats_queue qstats;
-	struct net_rate_estimator __rcu *rate_est;
-	struct tc_cbq_xstats	xstats;
-
-	struct tcf_proto __rcu	*filter_list;
-	struct tcf_block	*block;
-
-	int			filters;
-
-	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
-};
-
-struct cbq_sched_data {
-	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
-	int			nclasses[TC_CBQ_MAXPRIO + 1];
-	unsigned int		quanta[TC_CBQ_MAXPRIO + 1];
-
-	struct cbq_class	link;
-
-	unsigned int		activemask;
-	struct cbq_class	*active[TC_CBQ_MAXPRIO + 1];	/* List of all classes
-								   with backlog */
-
-#ifdef CONFIG_NET_CLS_ACT
-	struct cbq_class	*rx_class;
-#endif
-	struct cbq_class	*tx_class;
-	struct cbq_class	*tx_borrowed;
-	int			tx_len;
-	psched_time_t		now;		/* Cached timestamp */
-	unsigned int		pmask;
-
-	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
-						   started when CBQ has
-						   backlog, but cannot
-						   transmit just now */
-	psched_tdiff_t		wd_expires;
-	int			toplevel;
-	u32			hgenerator;
-};
-
-
-#define L2T(cl, len)	qdisc_l2t((cl)->R_tab, len)
-
-static inline struct cbq_class *
-cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
-{
-	struct Qdisc_class_common *clc;
-
-	clc = qdisc_class_find(&q->clhash, classid);
-	if (clc == NULL)
-		return NULL;
-	return container_of(clc, struct cbq_class, common);
-}
-
-#ifdef CONFIG_NET_CLS_ACT
-
-static struct cbq_class *
-cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
-{
-	struct cbq_class *cl;
-
-	for (cl = this->tparent; cl; cl = cl->tparent) {
-		struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
-
-		if (new != NULL && new != this)
-			return new;
-	}
-	return NULL;
-}
-
-#endif
-
-/* Classify packet. The procedure is pretty complicated, but
- * it allows us to combine link sharing and priority scheduling
- * transparently.
- *
- * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
- * so that it resolves to split nodes. Then packets are classified
- * by logical priority, or a more specific classifier may be attached
- * to the split node.
- */
-
-static struct cbq_class *
-cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *head = &q->link;
-	struct cbq_class **defmap;
-	struct cbq_class *cl = NULL;
-	u32 prio = skb->priority;
-	struct tcf_proto *fl;
-	struct tcf_result res;
-
-	/*
-	 *  Step 1. If skb->priority points to one of our classes, use it.
-	 */
-	if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
-	    (cl = cbq_class_lookup(q, prio)) != NULL)
-		return cl;
-
-	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-	for (;;) {
-		int result = 0;
-		defmap = head->defaults;
-
-		fl = rcu_dereference_bh(head->filter_list);
-		/*
-		 * Step 2+n. Apply classifier.
-		 */
-		result = tcf_classify(skb, NULL, fl, &res, true);
-		if (!fl || result < 0)
-			goto fallback;
-		if (result == TC_ACT_SHOT)
-			return NULL;
-
-		cl = (void *)res.class;
-		if (!cl) {
-			if (TC_H_MAJ(res.classid))
-				cl = cbq_class_lookup(q, res.classid);
-			else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
-				cl = defmap[TC_PRIO_BESTEFFORT];
-
-			if (cl == NULL)
-				goto fallback;
-		}
-		if (cl->level >= head->level)
-			goto fallback;
-#ifdef CONFIG_NET_CLS_ACT
-		switch (result) {
-		case TC_ACT_QUEUED:
-		case TC_ACT_STOLEN:
-		case TC_ACT_TRAP:
-			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
-			fallthrough;
-		case TC_ACT_RECLASSIFY:
-			return cbq_reclassify(skb, cl);
-		}
-#endif
-		if (cl->level == 0)
-			return cl;
-
-		/*
-		 * Step 3+n. If classifier selected a link sharing class,
-		 *	   apply agency specific classifier.
-		 *	   Repeat this procedure until we hit a leaf node.
-		 */
-		head = cl;
-	}
-
-fallback:
-	cl = head;
-
-	/*
-	 * Step 4. No success...
-	 */
-	if (TC_H_MAJ(prio) == 0 &&
-	    !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
-	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
-		return head;
-
-	return cl;
-}
-
-/*
- * A packet has just been enqueued on the empty class.
- * cbq_activate_class adds it to the tail of active class list
- * of its priority band.
- */
-
-static inline void cbq_activate_class(struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	int prio = cl->cpriority;
-	struct cbq_class *cl_tail;
-
-	cl_tail = q->active[prio];
-	q->active[prio] = cl;
-
-	if (cl_tail != NULL) {
-		cl->next_alive = cl_tail->next_alive;
-		cl_tail->next_alive = cl;
-	} else {
-		cl->next_alive = cl;
-		q->activemask |= (1<<prio);
-	}
-}
-
-/*
- * Unlink class from active chain.
- * Note that this same procedure is done directly in cbq_dequeue*
- * during round-robin procedure.
- */
-
-static void cbq_deactivate_class(struct cbq_class *this)
-{
-	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
-	int prio = this->cpriority;
-	struct cbq_class *cl;
-	struct cbq_class *cl_prev = q->active[prio];
-
-	do {
-		cl = cl_prev->next_alive;
-		if (cl == this) {
-			cl_prev->next_alive = cl->next_alive;
-			cl->next_alive = NULL;
-
-			if (cl == q->active[prio]) {
-				q->active[prio] = cl_prev;
-				if (cl == q->active[prio]) {
-					q->active[prio] = NULL;
-					q->activemask &= ~(1<<prio);
-					return;
-				}
-			}
-			return;
-		}
-	} while ((cl_prev = cl) != q->active[prio]);
-}
-
-static void
-cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
-{
-	int toplevel = q->toplevel;
-
-	if (toplevel > cl->level) {
-		psched_time_t now = psched_get_time();
-
-		do {
-			if (cl->undertime < now) {
-				q->toplevel = cl->level;
-				return;
-			}
-		} while ((cl = cl->borrow) != NULL && toplevel > cl->level);
-	}
-}
-
-static int
-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
-	    struct sk_buff **to_free)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	int ret;
-	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
-
-#ifdef CONFIG_NET_CLS_ACT
-	q->rx_class = cl;
-#endif
-	if (cl == NULL) {
-		if (ret & __NET_XMIT_BYPASS)
-			qdisc_qstats_drop(sch);
-		__qdisc_drop(skb, to_free);
-		return ret;
-	}
-
-	ret = qdisc_enqueue(skb, cl->q, to_free);
-	if (ret == NET_XMIT_SUCCESS) {
-		sch->q.qlen++;
-		cbq_mark_toplevel(q, cl);
-		if (!cl->next_alive)
-			cbq_activate_class(cl);
-		return ret;
-	}
-
-	if (net_xmit_drop_count(ret)) {
-		qdisc_qstats_drop(sch);
-		cbq_mark_toplevel(q, cl);
-		cl->qstats.drops++;
-	}
-	return ret;
-}
-
-/* Overlimit action: penalize leaf class by adding offtime */
-static void cbq_overlimit(struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	psched_tdiff_t delay = cl->undertime - q->now;
-
-	if (!cl->delayed) {
-		delay += cl->offtime;
-
-		/*
-		 * Class goes to sleep, so that it will have no
-		 * chance to work avgidle. Let's forgive it 8)
-		 *
-		 * BTW cbq-2.0 has a crap in this
-		 * place, apparently they forgot to shift it by cl->ewma_log.
-		 */
-		if (cl->avgidle < 0)
-			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
-		if (cl->avgidle < cl->minidle)
-			cl->avgidle = cl->minidle;
-		if (delay <= 0)
-			delay = 1;
-		cl->undertime = q->now + delay;
-
-		cl->xstats.overactions++;
-		cl->delayed = 1;
-	}
-	if (q->wd_expires == 0 || q->wd_expires > delay)
-		q->wd_expires = delay;
-
-	/* Dirty work! We must schedule wakeups based on
-	 * real available rate, rather than leaf rate,
-	 * which may be tiny (even zero).
-	 */
-	if (q->toplevel == TC_CBQ_MAXLEVEL) {
-		struct cbq_class *b;
-		psched_tdiff_t base_delay = q->wd_expires;
-
-		for (b = cl->borrow; b; b = b->borrow) {
-			delay = b->undertime - q->now;
-			if (delay < base_delay) {
-				if (delay <= 0)
-					delay = 1;
-				base_delay = delay;
-			}
-		}
-
-		q->wd_expires = base_delay;
-	}
-}
-
-/*
- * It is mission critical procedure.
- *
- * We "regenerate" toplevel cutoff, if transmitting class
- * has backlog and it is not regulated. It is not part of
- * original CBQ description, but looks more reasonable.
- * Probably, it is wrong. This question needs further investigation.
- */
-
-static inline void
-cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
-		    struct cbq_class *borrowed)
-{
-	if (cl && q->toplevel >= borrowed->level) {
-		if (cl->q->q.qlen > 1) {
-			do {
-				if (borrowed->undertime == PSCHED_PASTPERFECT) {
-					q->toplevel = borrowed->level;
-					return;
-				}
-			} while ((borrowed = borrowed->borrow) != NULL);
-		}
-#if 0
-	/* It is not necessary now. Uncommenting it
-	   will save CPU cycles, but decrease fairness.
-	 */
-		q->toplevel = TC_CBQ_MAXLEVEL;
-#endif
-	}
-}
-
-static void
-cbq_update(struct cbq_sched_data *q)
-{
-	struct cbq_class *this = q->tx_class;
-	struct cbq_class *cl = this;
-	int len = q->tx_len;
-	psched_time_t now;
-
-	q->tx_class = NULL;
-	/* Time integrator. We calculate EOS time
-	 * by adding expected packet transmission time.
-	 */
-	now = q->now + L2T(&q->link, len);
-
-	for ( ; cl; cl = cl->share) {
-		long avgidle = cl->avgidle;
-		long idle;
-
-		_bstats_update(&cl->bstats, len, 1);
-
-		/*
-		 * (now - last) is total time between packet right edges.
-		 * (last_pktlen/rate) is "virtual" busy time, so that
-		 *
-		 *	idle = (now - last) - last_pktlen/rate
-		 */
-
-		idle = now - cl->last;
-		if ((unsigned long)idle > 128*1024*1024) {
-			avgidle = cl->maxidle;
-		} else {
-			idle -= L2T(cl, len);
-
-		/* true_avgidle := (1-W)*true_avgidle + W*idle,
-		 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
-		 * cl->avgidle == true_avgidle/W,
-		 * hence:
-		 */
-			avgidle += idle - (avgidle>>cl->ewma_log);
-		}
-
-		if (avgidle <= 0) {
-			/* Overlimit or at-limit */
-
-			if (avgidle < cl->minidle)
-				avgidle = cl->minidle;
-
-			cl->avgidle = avgidle;
-
-			/* Calculate expected time, when this class
-			 * will be allowed to send.
-			 * It will occur, when:
-			 * (1-W)*true_avgidle + W*delay = 0, i.e.
-			 * idle = (1/W - 1)*(-true_avgidle)
-			 * or
-			 * idle = (1 - W)*(-cl->avgidle);
-			 */
-			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
-
-			/*
-			 * That is not all.
-			 * To maintain the rate allocated to the class,
-			 * we add to undertime virtual clock,
-			 * necessary to complete transmitted packet.
-			 * (len/phys_bandwidth has been already passed
-			 * to the moment of cbq_update)
-			 */
-
-			idle -= L2T(&q->link, len);
-			idle += L2T(cl, len);
-
-			cl->undertime = now + idle;
-		} else {
-			/* Underlimit */
-
-			cl->undertime = PSCHED_PASTPERFECT;
-			if (avgidle > cl->maxidle)
-				cl->avgidle = cl->maxidle;
-			else
-				cl->avgidle = avgidle;
-		}
-		if ((s64)(now - cl->last) > 0)
-			cl->last = now;
-	}
-
-	cbq_update_toplevel(q, this, q->tx_borrowed);
-}
-
-static inline struct cbq_class *
-cbq_under_limit(struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	struct cbq_class *this_cl = cl;
-
-	if (cl->tparent == NULL)
-		return cl;
-
-	if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
-		cl->delayed = 0;
-		return cl;
-	}
-
-	do {
-		/* It is very suspicious place. Now overlimit
-		 * action is generated for not bounded classes
-		 * only if link is completely congested.
-		 * Though it is in agree with ancestor-only paradigm,
-		 * it looks very stupid. Particularly,
-		 * it means that this chunk of code will either
-		 * never be called or result in strong amplification
-		 * of burstiness. Dangerous, silly, and, however,
-		 * no another solution exists.
-		 */
-		cl = cl->borrow;
-		if (!cl) {
-			this_cl->qstats.overlimits++;
-			cbq_overlimit(this_cl);
-			return NULL;
-		}
-		if (cl->level > q->toplevel)
-			return NULL;
-	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
-
-	cl->delayed = 0;
-	return cl;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_prio(struct Qdisc *sch, int prio)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl_tail, *cl_prev, *cl;
-	struct sk_buff *skb;
-	int deficit;
-
-	cl_tail = cl_prev = q->active[prio];
-	cl = cl_prev->next_alive;
-
-	do {
-		deficit = 0;
-
-		/* Start round */
-		do {
-			struct cbq_class *borrow = cl;
-
-			if (cl->q->q.qlen &&
-			    (borrow = cbq_under_limit(cl)) == NULL)
-				goto skip_class;
-
-			if (cl->deficit <= 0) {
-				/* Class exhausted its allotment per
-				 * this round. Switch to the next one.
-				 */
-				deficit = 1;
-				cl->deficit += cl->quantum;
-				goto next_class;
-			}
-
-			skb = cl->q->dequeue(cl->q);
-
-			/* Class did not give us any skb :-(
-			 * It could occur even if cl->q->q.qlen != 0
-			 * f.e. if cl->q == "tbf"
-			 */
-			if (skb == NULL)
-				goto skip_class;
-
-			cl->deficit -= qdisc_pkt_len(skb);
-			q->tx_class = cl;
-			q->tx_borrowed = borrow;
-			if (borrow != cl) {
-#ifndef CBQ_XSTATS_BORROWS_BYTES
-				borrow->xstats.borrows++;
-				cl->xstats.borrows++;
-#else
-				borrow->xstats.borrows += qdisc_pkt_len(skb);
-				cl->xstats.borrows += qdisc_pkt_len(skb);
-#endif
-			}
-			q->tx_len = qdisc_pkt_len(skb);
-
-			if (cl->deficit <= 0) {
-				q->active[prio] = cl;
-				cl = cl->next_alive;
-				cl->deficit += cl->quantum;
-			}
-			return skb;
-
-skip_class:
-			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
-				/* Class is empty or penalized.
-				 * Unlink it from active chain.
-				 */
-				cl_prev->next_alive = cl->next_alive;
-				cl->next_alive = NULL;
-
-				/* Did cl_tail point to it? */
-				if (cl == cl_tail) {
-					/* Repair it! */
-					cl_tail = cl_prev;
-
-					/* Was it the last class in this band? */
-					if (cl == cl_tail) {
-						/* Kill the band! */
-						q->active[prio] = NULL;
-						q->activemask &= ~(1<<prio);
-						if (cl->q->q.qlen)
-							cbq_activate_class(cl);
-						return NULL;
-					}
-
-					q->active[prio] = cl_tail;
-				}
-				if (cl->q->q.qlen)
-					cbq_activate_class(cl);
-
-				cl = cl_prev;
-			}
-
-next_class:
-			cl_prev = cl;
-			cl = cl->next_alive;
-		} while (cl_prev != cl_tail);
-	} while (deficit);
-
-	q->active[prio] = cl_prev;
-
-	return NULL;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_1(struct Qdisc *sch)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct sk_buff *skb;
-	unsigned int activemask;
-
-	activemask = q->activemask & 0xFF;
-	while (activemask) {
-		int prio = ffz(~activemask);
-		activemask &= ~(1<<prio);
-		skb = cbq_dequeue_prio(sch, prio);
-		if (skb)
-			return skb;
-	}
-	return NULL;
-}
-
-static struct sk_buff *
-cbq_dequeue(struct Qdisc *sch)
-{
-	struct sk_buff *skb;
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	psched_time_t now;
-
-	now = psched_get_time();
-
-	if (q->tx_class)
-		cbq_update(q);
-
-	q->now = now;
-
-	for (;;) {
-		q->wd_expires = 0;
-
-		skb = cbq_dequeue_1(sch);
-		if (skb) {
-			qdisc_bstats_update(sch, skb);
-			sch->q.qlen--;
-			return skb;
-		}
-
-		/* All the classes are overlimit.
-		 *
-		 * It is possible, if:
-		 *
-		 * 1. Scheduler is empty.
-		 * 2. Toplevel cutoff inhibited borrowing.
-		 * 3. Root class is overlimit.
-		 *
-		 * Reset 2d and 3d conditions and retry.
-		 *
-		 * Note, that NS and cbq-2.0 are buggy, peeking
-		 * an arbitrary class is appropriate for ancestor-only
-		 * sharing, but not for toplevel algorithm.
-		 *
-		 * Our version is better, but slower, because it requires
-		 * two passes, but it is unavoidable with top-level sharing.
-		 */
-
-		if (q->toplevel == TC_CBQ_MAXLEVEL &&
-		    q->link.undertime == PSCHED_PASTPERFECT)
-			break;
-
-		q->toplevel = TC_CBQ_MAXLEVEL;
-		q->link.undertime = PSCHED_PASTPERFECT;
-	}
-
-	/* No packets in scheduler or nobody wants to give them to us :-(
-	 * Sigh... start watchdog timer in the last case.
-	 */
-
-	if (sch->q.qlen) {
-		qdisc_qstats_overlimit(sch);
-		if (q->wd_expires)
-			qdisc_watchdog_schedule(&q->watchdog,
-						now + q->wd_expires);
-	}
-	return NULL;
-}
-
-/* CBQ class maintenance routines */
-
-static void cbq_adjust_levels(struct cbq_class *this)
-{
-	if (this == NULL)
-		return;
-
-	do {
-		int level = 0;
-		struct cbq_class *cl;
-
-		cl = this->children;
-		if (cl) {
-			do {
-				if (cl->level > level)
-					level = cl->level;
-			} while ((cl = cl->sibling) != this->children);
-		}
-		this->level = level + 1;
-	} while ((this = this->tparent) != NULL);
-}
-
-static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
-{
-	struct cbq_class *cl;
-	unsigned int h;
-
-	if (q->quanta[prio] == 0)
-		return;
-
-	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
-			/* BUGGGG... Beware! This expression suffer of
-			 * arithmetic overflows!
-			 */
-			if (cl->priority == prio) {
-				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
-					q->quanta[prio];
-			}
-			if (cl->quantum <= 0 ||
-			    cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
-				pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
-					cl->common.classid, cl->quantum);
-				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
-			}
-		}
-	}
-}
-
-static void cbq_sync_defmap(struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	struct cbq_class *split = cl->split;
-	unsigned int h;
-	int i;
-
-	if (split == NULL)
-		return;
-
-	for (i = 0; i <= TC_PRIO_MAX; i++) {
-		if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
-			split->defaults[i] = NULL;
-	}
-
-	for (i = 0; i <= TC_PRIO_MAX; i++) {
-		int level = split->level;
-
-		if (split->defaults[i])
-			continue;
-
-		for (h = 0; h < q->clhash.hashsize; h++) {
-			struct cbq_class *c;
-
-			hlist_for_each_entry(c, &q->clhash.hash[h],
-					     common.hnode) {
-				if (c->split == split && c->level < level &&
-				    c->defmap & (1<<i)) {
-					split->defaults[i] = c;
-					level = c->level;
-				}
-			}
-		}
-	}
-}
-
-static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
-{
-	struct cbq_class *split = NULL;
-
-	if (splitid == 0) {
-		split = cl->split;
-		if (!split)
-			return;
-		splitid = split->common.classid;
-	}
-
-	if (split == NULL || split->common.classid != splitid) {
-		for (split = cl->tparent; split; split = split->tparent)
-			if (split->common.classid == splitid)
-				break;
-	}
-
-	if (split == NULL)
-		return;
-
-	if (cl->split != split) {
-		cl->defmap = 0;
-		cbq_sync_defmap(cl);
-		cl->split = split;
-		cl->defmap = def & mask;
-	} else
-		cl->defmap = (cl->defmap & ~mask) | (def & mask);
-
-	cbq_sync_defmap(cl);
-}
-
-static void cbq_unlink_class(struct cbq_class *this)
-{
-	struct cbq_class *cl, **clp;
-	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
-
-	qdisc_class_hash_remove(&q->clhash, &this->common);
-
-	if (this->tparent) {
-		clp = &this->sibling;
-		cl = *clp;
-		do {
-			if (cl == this) {
-				*clp = cl->sibling;
-				break;
-			}
-			clp = &cl->sibling;
-		} while ((cl = *clp) != this->sibling);
-
-		if (this->tparent->children == this) {
-			this->tparent->children = this->sibling;
-			if (this->sibling == this)
-				this->tparent->children = NULL;
-		}
-	} else {
-		WARN_ON(this->sibling != this);
-	}
-}
-
-static void cbq_link_class(struct cbq_class *this)
-{
-	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
-	struct cbq_class *parent = this->tparent;
-
-	this->sibling = this;
-	qdisc_class_hash_insert(&q->clhash, &this->common);
-
-	if (parent == NULL)
-		return;
-
-	if (parent->children == NULL) {
-		parent->children = this;
-	} else {
-		this->sibling = parent->children->sibling;
-		parent->children->sibling = this;
-	}
-}
-
-static void
-cbq_reset(struct Qdisc *sch)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl;
-	int prio;
-	unsigned int h;
-
-	q->activemask = 0;
-	q->pmask = 0;
-	q->tx_class = NULL;
-	q->tx_borrowed = NULL;
-	qdisc_watchdog_cancel(&q->watchdog);
-	q->toplevel = TC_CBQ_MAXLEVEL;
-	q->now = psched_get_time();
-
-	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
-		q->active[prio] = NULL;
-
-	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
-			qdisc_reset(cl->q);
-
-			cl->next_alive = NULL;
-			cl->undertime = PSCHED_PASTPERFECT;
-			cl->avgidle = cl->maxidle;
-			cl->deficit = cl->quantum;
-			cl->cpriority = cl->priority;
-		}
-	}
-}
-
-
-static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
-{
-	if (lss->change & TCF_CBQ_LSS_FLAGS) {
-		cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
-		cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
-	}
-	if (lss->change & TCF_CBQ_LSS_EWMA)
-		cl->ewma_log = lss->ewma_log;
-	if (lss->change & TCF_CBQ_LSS_AVPKT)
-		cl->avpkt = lss->avpkt;
-	if (lss->change & TCF_CBQ_LSS_MINIDLE)
-		cl->minidle = -(long)lss->minidle;
-	if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
-		cl->maxidle = lss->maxidle;
-		cl->avgidle = lss->maxidle;
-	}
-	if (lss->change & TCF_CBQ_LSS_OFFTIME)
-		cl->offtime = lss->offtime;
-}
-
-static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
-	q->nclasses[cl->priority]--;
-	q->quanta[cl->priority] -= cl->weight;
-	cbq_normalize_quanta(q, cl->priority);
-}
-
-static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
-	q->nclasses[cl->priority]++;
-	q->quanta[cl->priority] += cl->weight;
-	cbq_normalize_quanta(q, cl->priority);
-}
-
-static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
-{
-	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-
-	if (wrr->allot)
-		cl->allot = wrr->allot;
-	if (wrr->weight)
-		cl->weight = wrr->weight;
-	if (wrr->priority) {
-		cl->priority = wrr->priority - 1;
-		cl->cpriority = cl->priority;
-		if (cl->priority >= cl->priority2)
-			cl->priority2 = TC_CBQ_MAXPRIO - 1;
-	}
-
-	cbq_addprio(q, cl);
-	return 0;
-}
-
-static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
-{
-	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
-	return 0;
-}
-
-static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
-	[TCA_CBQ_LSSOPT]	= { .len = sizeof(struct tc_cbq_lssopt) },
-	[TCA_CBQ_WRROPT]	= { .len = sizeof(struct tc_cbq_wrropt) },
-	[TCA_CBQ_FOPT]		= { .len = sizeof(struct tc_cbq_fopt) },
-	[TCA_CBQ_OVL_STRATEGY]	= { .len = sizeof(struct tc_cbq_ovl) },
-	[TCA_CBQ_RATE]		= { .len = sizeof(struct tc_ratespec) },
-	[TCA_CBQ_RTAB]		= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
-	[TCA_CBQ_POLICE]	= { .len = sizeof(struct tc_cbq_police) },
-};
-
-static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1],
-			 struct nlattr *opt,
-			 struct netlink_ext_ack *extack)
-{
-	int err;
-
-	if (!opt) {
-		NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
-		return -EINVAL;
-	}
-
-	err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt,
-					  cbq_policy, extack);
-	if (err < 0)
-		return err;
-
-	if (tb[TCA_CBQ_WRROPT]) {
-		const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
-
-		if (wrr->priority > TC_CBQ_MAXPRIO) {
-			NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO");
-			err = -EINVAL;
-		}
-	}
-	return err;
-}
-
-static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
-		    struct netlink_ext_ack *extack)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct nlattr *tb[TCA_CBQ_MAX + 1];
-	struct tc_ratespec *r;
-	int err;
-
-	qdisc_watchdog_init(&q->watchdog, sch);
-
-	err = cbq_opt_parse(tb, opt, extack);
-	if (err < 0)
-		return err;
-
-	if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) {
-		NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete");
-		return -EINVAL;
-	}
-
-	r = nla_data(tb[TCA_CBQ_RATE]);
-
-	q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack);
-	if (!q->link.R_tab)
-		return -EINVAL;
-
-	err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack);
-	if (err)
-		goto put_rtab;
-
-	err = qdisc_class_hash_init(&q->clhash);
-	if (err < 0)
-		goto put_block;
-
-	q->link.sibling = &q->link;
-	q->link.common.classid = sch->handle;
-	q->link.qdisc = sch;
-	q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-				      sch->handle, NULL);
-	if (!q->link.q)
-		q->link.q = &noop_qdisc;
-	else
-		qdisc_hash_add(q->link.q, true);
-
-	q->link.priority = TC_CBQ_MAXPRIO - 1;
-	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
-	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
-	q->link.allot = psched_mtu(qdisc_dev(sch));
-	q->link.quantum = q->link.allot;
-	q->link.weight = q->link.R_tab->rate.rate;
-
-	q->link.ewma_log = TC_CBQ_DEF_EWMA;
-	q->link.avpkt = q->link.allot/2;
-	q->link.minidle = -0x7FFFFFFF;
-
-	q->toplevel = TC_CBQ_MAXLEVEL;
-	q->now = psched_get_time();
-
-	cbq_link_class(&q->link);
-
-	if (tb[TCA_CBQ_LSSOPT])
-		cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
-
-	cbq_addprio(q, &q->link);
-	return 0;
-
-put_block:
-	tcf_block_put(q->link.block);
-
-put_rtab:
-	qdisc_put_rtab(q->link.R_tab);
-	return err;
-}
-
-static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-
-	if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
-		goto nla_put_failure;
-	return skb->len;
-
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_cbq_lssopt opt;
-
-	opt.flags = 0;
-	if (cl->borrow == NULL)
-		opt.flags |= TCF_CBQ_LSS_BOUNDED;
-	if (cl->share == NULL)
-		opt.flags |= TCF_CBQ_LSS_ISOLATED;
-	opt.ewma_log = cl->ewma_log;
-	opt.level = cl->level;
-	opt.avpkt = cl->avpkt;
-	opt.maxidle = cl->maxidle;
-	opt.minidle = (u32)(-cl->minidle);
-	opt.offtime = cl->offtime;
-	opt.change = ~0;
-	if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
-		goto nla_put_failure;
-	return skb->len;
-
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_cbq_wrropt opt;
-
-	memset(&opt, 0, sizeof(opt));
-	opt.flags = 0;
-	opt.allot = cl->allot;
-	opt.priority = cl->priority + 1;
-	opt.cpriority = cl->cpriority + 1;
-	opt.weight = cl->weight;
-	if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
-		goto nla_put_failure;
-	return skb->len;
-
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_cbq_fopt opt;
-
-	if (cl->split || cl->defmap) {
-		opt.split = cl->split ? cl->split->common.classid : 0;
-		opt.defmap = cl->defmap;
-		opt.defchange = ~0;
-		if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
-			goto nla_put_failure;
-	}
-	return skb->len;
-
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
-{
-	if (cbq_dump_lss(skb, cl) < 0 ||
-	    cbq_dump_rate(skb, cl) < 0 ||
-	    cbq_dump_wrr(skb, cl) < 0 ||
-	    cbq_dump_fopt(skb, cl) < 0)
-		return -1;
-	return 0;
-}
-
-static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct nlattr *nest;
-
-	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
-	if (nest == NULL)
-		goto nla_put_failure;
-	if (cbq_dump_attr(skb, &q->link) < 0)
-		goto nla_put_failure;
-	return nla_nest_end(skb, nest);
-
-nla_put_failure:
-	nla_nest_cancel(skb, nest);
-	return -1;
-}
-
-static int
-cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-
-	q->link.xstats.avgidle = q->link.avgidle;
-	return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
-}
-
-static int
-cbq_dump_class(struct Qdisc *sch, unsigned long arg,
-	       struct sk_buff *skb, struct tcmsg *tcm)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-	struct nlattr *nest;
-
-	if (cl->tparent)
-		tcm->tcm_parent = cl->tparent->common.classid;
-	else
-		tcm->tcm_parent = TC_H_ROOT;
-	tcm->tcm_handle = cl->common.classid;
-	tcm->tcm_info = cl->q->handle;
-
-	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
-	if (nest == NULL)
-		goto nla_put_failure;
-	if (cbq_dump_attr(skb, cl) < 0)
-		goto nla_put_failure;
-	return nla_nest_end(skb, nest);
-
-nla_put_failure:
-	nla_nest_cancel(skb, nest);
-	return -1;
-}
-
-static int
-cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
-	struct gnet_dump *d)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class *)arg;
-	__u32 qlen;
-
-	cl->xstats.avgidle = cl->avgidle;
-	cl->xstats.undertime = 0;
-	qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
-
-	if (cl->undertime != PSCHED_PASTPERFECT)
-		cl->xstats.undertime = cl->undertime - q->now;
-
-	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
-	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
-		return -1;
-
-	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
-}
-
-static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
-		     struct Qdisc **old, struct netlink_ext_ack *extack)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	if (new == NULL) {
-		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-					cl->common.classid, extack);
-		if (new == NULL)
-			return -ENOBUFS;
-	}
-
-	*old = qdisc_replace(sch, new, &cl->q);
-	return 0;
-}
-
-static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	return cl->q;
-}
-
-static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	cbq_deactivate_class(cl);
-}
-
-static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-
-	return (unsigned long)cbq_class_lookup(q, classid);
-}
-
-static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-
-	WARN_ON(cl->filters);
-
-	tcf_block_put(cl->block);
-	qdisc_put(cl->q);
-	qdisc_put_rtab(cl->R_tab);
-	gen_kill_estimator(&cl->rate_est);
-	if (cl != &q->link)
-		kfree(cl);
-}
-
-static void cbq_destroy(struct Qdisc *sch)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct hlist_node *next;
-	struct cbq_class *cl;
-	unsigned int h;
-
-#ifdef CONFIG_NET_CLS_ACT
-	q->rx_class = NULL;
-#endif
-	/*
-	 * Filters must be destroyed first because we don't destroy the
-	 * classes from root to leafs which means that filters can still
-	 * be bound to classes which have been destroyed already. --TGR '04
-	 */
-	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
-			tcf_block_put(cl->block);
-			cl->block = NULL;
-		}
-	}
-	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
-					  common.hnode)
-			cbq_destroy_class(sch, cl);
-	}
-	qdisc_class_hash_destroy(&q->clhash);
-}
-
-static int
-cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
-		 unsigned long *arg, struct netlink_ext_ack *extack)
-{
-	int err;
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class *)*arg;
-	struct nlattr *opt = tca[TCA_OPTIONS];
-	struct nlattr *tb[TCA_CBQ_MAX + 1];
-	struct cbq_class *parent;
-	struct qdisc_rate_table *rtab = NULL;
-
-	err = cbq_opt_parse(tb, opt, extack);
-	if (err < 0)
-		return err;
-
-	if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) {
-		NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params");
-		return -EOPNOTSUPP;
-	}
-
-	if (cl) {
-		/* Check parent */
-		if (parentid) {
-			if (cl->tparent &&
-			    cl->tparent->common.classid != parentid) {
-				NL_SET_ERR_MSG(extack, "Invalid parent id");
-				return -EINVAL;
-			}
-			if (!cl->tparent && parentid != TC_H_ROOT) {
-				NL_SET_ERR_MSG(extack, "Parent must be root");
-				return -EINVAL;
-			}
-		}
-
-		if (tb[TCA_CBQ_RATE]) {
-			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
-					      tb[TCA_CBQ_RTAB], extack);
-			if (rtab == NULL)
-				return -EINVAL;
-		}
-
-		if (tca[TCA_RATE]) {
-			err = gen_replace_estimator(&cl->bstats, NULL,
-						    &cl->rate_est,
-						    NULL,
-						    true,
-						    tca[TCA_RATE]);
-			if (err) {
-				NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
-				qdisc_put_rtab(rtab);
-				return err;
-			}
-		}
-
-		/* Change class parameters */
-		sch_tree_lock(sch);
-
-		if (cl->next_alive != NULL)
-			cbq_deactivate_class(cl);
-
-		if (rtab) {
-			qdisc_put_rtab(cl->R_tab);
-			cl->R_tab = rtab;
-		}
-
-		if (tb[TCA_CBQ_LSSOPT])
-			cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
-
-		if (tb[TCA_CBQ_WRROPT]) {
-			cbq_rmprio(q, cl);
-			cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
-		}
-
-		if (tb[TCA_CBQ_FOPT])
-			cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
-
-		if (cl->q->q.qlen)
-			cbq_activate_class(cl);
-
-		sch_tree_unlock(sch);
-
-		return 0;
-	}
-
-	if (parentid == TC_H_ROOT)
-		return -EINVAL;
-
-	if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) {
-		NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing");
-		return -EINVAL;
-	}
-
-	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB],
-			      extack);
-	if (rtab == NULL)
-		return -EINVAL;
-
-	if (classid) {
-		err = -EINVAL;
-		if (TC_H_MAJ(classid ^ sch->handle) ||
-		    cbq_class_lookup(q, classid)) {
-			NL_SET_ERR_MSG(extack, "Specified class not found");
-			goto failure;
-		}
-	} else {
-		int i;
-		classid = TC_H_MAKE(sch->handle, 0x8000);
-
-		for (i = 0; i < 0x8000; i++) {
-			if (++q->hgenerator >= 0x8000)
-				q->hgenerator = 1;
-			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
-				break;
-		}
-		err = -ENOSR;
-		if (i >= 0x8000) {
-			NL_SET_ERR_MSG(extack, "Unable to generate classid");
-			goto failure;
-		}
-		classid = classid|q->hgenerator;
-	}
-
-	parent = &q->link;
-	if (parentid) {
-		parent = cbq_class_lookup(q, parentid);
-		err = -EINVAL;
-		if (!parent) {
-			NL_SET_ERR_MSG(extack, "Failed to find parentid");
-			goto failure;
-		}
-	}
-
-	err = -ENOBUFS;
-	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
-	if (cl == NULL)
-		goto failure;
-
-	gnet_stats_basic_sync_init(&cl->bstats);
-	err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
-	if (err) {
-		kfree(cl);
-		goto failure;
-	}
-
-	if (tca[TCA_RATE]) {
-		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					NULL, true, tca[TCA_RATE]);
-		if (err) {
-			NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
-			tcf_block_put(cl->block);
-			kfree(cl);
-			goto failure;
-		}
-	}
-
-	cl->R_tab = rtab;
-	rtab = NULL;
-	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
-				  NULL);
-	if (!cl->q)
-		cl->q = &noop_qdisc;
-	else
-		qdisc_hash_add(cl->q, true);
-
-	cl->common.classid = classid;
-	cl->tparent = parent;
-	cl->qdisc = sch;
-	cl->allot = parent->allot;
-	cl->quantum = cl->allot;
-	cl->weight = cl->R_tab->rate.rate;
-
-	sch_tree_lock(sch);
-	cbq_link_class(cl);
-	cl->borrow = cl->tparent;
-	if (cl->tparent != &q->link)
-		cl->share = cl->tparent;
-	cbq_adjust_levels(parent);
-	cl->minidle = -0x7FFFFFFF;
-	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
-	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
-	if (cl->ewma_log == 0)
-		cl->ewma_log = q->link.ewma_log;
-	if (cl->maxidle == 0)
-		cl->maxidle = q->link.maxidle;
-	if (cl->avpkt == 0)
-		cl->avpkt = q->link.avpkt;
-	if (tb[TCA_CBQ_FOPT])
-		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
-	sch_tree_unlock(sch);
-
-	qdisc_class_hash_grow(sch, &q->clhash);
-
-	*arg = (unsigned long)cl;
-	return 0;
-
-failure:
-	qdisc_put_rtab(rtab);
-	return err;
-}
-
-static int cbq_delete(struct Qdisc *sch, unsigned long arg,
-		      struct netlink_ext_ack *extack)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	if (cl->filters || cl->children || cl == &q->link)
-		return -EBUSY;
-
-	sch_tree_lock(sch);
-
-	qdisc_purge_queue(cl->q);
-
-	if (cl->next_alive)
-		cbq_deactivate_class(cl);
-
-	if (q->tx_borrowed == cl)
-		q->tx_borrowed = q->tx_class;
-	if (q->tx_class == cl) {
-		q->tx_class = NULL;
-		q->tx_borrowed = NULL;
-	}
-#ifdef CONFIG_NET_CLS_ACT
-	if (q->rx_class == cl)
-		q->rx_class = NULL;
-#endif
-
-	cbq_unlink_class(cl);
-	cbq_adjust_levels(cl->tparent);
-	cl->defmap = 0;
-	cbq_sync_defmap(cl);
-
-	cbq_rmprio(q, cl);
-	sch_tree_unlock(sch);
-
-	cbq_destroy_class(sch, cl);
-	return 0;
-}
-
-static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg,
-				       struct netlink_ext_ack *extack)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	if (cl == NULL)
-		cl = &q->link;
-
-	return cl->block;
-}
-
-static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
-				     u32 classid)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *p = (struct cbq_class *)parent;
-	struct cbq_class *cl = cbq_class_lookup(q, classid);
-
-	if (cl) {
-		if (p && p->level <= cl->level)
-			return 0;
-		cl->filters++;
-		return (unsigned long)cl;
-	}
-	return 0;
-}
-
-static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	cl->filters--;
-}
-
-static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl;
-	unsigned int h;
-
-	if (arg->stop)
-		return;
-
-	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
-			if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
-				return;
-		}
-	}
-}
-
-static const struct Qdisc_class_ops cbq_class_ops = {
-	.graft		=	cbq_graft,
-	.leaf		=	cbq_leaf,
-	.qlen_notify	=	cbq_qlen_notify,
-	.find		=	cbq_find,
-	.change		=	cbq_change_class,
-	.delete		=	cbq_delete,
-	.walk		=	cbq_walk,
-	.tcf_block	=	cbq_tcf_block,
-	.bind_tcf	=	cbq_bind_filter,
-	.unbind_tcf	=	cbq_unbind_filter,
-	.dump		=	cbq_dump_class,
-	.dump_stats	=	cbq_dump_class_stats,
-};
-
-static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
-	.next		=	NULL,
-	.cl_ops		=	&cbq_class_ops,
-	.id		=	"cbq",
-	.priv_size	=	sizeof(struct cbq_sched_data),
-	.enqueue	=	cbq_enqueue,
-	.dequeue	=	cbq_dequeue,
-	.peek		=	qdisc_peek_dequeued,
-	.init		=	cbq_init,
-	.reset		=	cbq_reset,
-	.destroy	=	cbq_destroy,
-	.change		=	NULL,
-	.dump		=	cbq_dump,
-	.dump_stats	=	cbq_dump_stats,
-	.owner		=	THIS_MODULE,
-};
-
-static int __init cbq_module_init(void)
-{
-	return register_qdisc(&cbq_qdisc_ops);
-}
-static void __exit cbq_module_exit(void)
-{
-	unregister_qdisc(&cbq_qdisc_ops);
-}
-module_init(cbq_module_init)
-module_exit(cbq_module_exit)
-MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json
deleted file mode 100644
index 1ab21c83a122..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json
+++ /dev/null
@@ -1,184 +0,0 @@
-[
-    {
-        "id": "3460",
-        "name": "Create CBQ with default setting",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "0592",
-        "name": "Create CBQ with mpu",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "4684",
-        "name": "Create CBQ with valid cell num",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "4345",
-        "name": "Create CBQ with invalid cell num",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100",
-        "expExitCode": "1",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "0",
-        "teardown": [
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "4525",
-        "name": "Create CBQ with valid ewma",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "6784",
-        "name": "Create CBQ with invalid ewma",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128",
-        "expExitCode": "1",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "0",
-        "teardown": [
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "5468",
-        "name": "Delete CBQ with handle",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true",
-            "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000"
-        ],
-        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "0",
-        "teardown": [
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "492a",
-        "name": "Show CBQ class",
-        "category": [
-            "qdisc",
-            "cbq"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000",
-        "expExitCode": "0",
-        "verifyCmd": "$TC class show dev $DUMMY",
-        "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    }
-]
-- 
cgit v1.2.3


From fb38306ceb9e770adfb5ffa6e3c64047b55f7a07 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 14 Feb 2023 08:49:12 -0500
Subject: net/sched: Retire ATM qdisc

The ATM qdisc has served us well over the years but has not been getting much
TLC due to lack of known users. Most recently it has become a shooting target
for syzkaller. For this reason, we are retiring it.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sched/Kconfig                                  |  14 -
 net/sched/Makefile                                 |   1 -
 net/sched/sch_atm.c                                | 706 ---------------------
 .../selftests/tc-testing/tc-tests/qdiscs/atm.json  |  94 ---
 4 files changed, 815 deletions(-)
 delete mode 100644 net/sched/sch_atm.c
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json

(limited to 'tools/testing')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 960e30b6b746..869321db12f7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -68,20 +68,6 @@ config NET_SCH_HFSC
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_hfsc.
 
-config NET_SCH_ATM
-	tristate "ATM Virtual Circuits (ATM)"
-	depends on ATM
-	help
-	  Say Y here if you want to use the ATM pseudo-scheduler.  This
-	  provides a framework for invoking classifiers, which in turn
-	  select classes of this queuing discipline.  Each class maps
-	  the flow(s) it is handling to a given virtual circuit.
-
-	  See the top of <file:net/sched/sch_atm.c> for more details.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called sch_atm.
-
 config NET_SCH_PRIO
 	tristate "Multi Band Priority Queueing (PRIO)"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 9cc7d74b9c4a..d2612b47530c 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -45,7 +45,6 @@ obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
 obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
 obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
 obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
-obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG)	+= sch_plug.o
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
deleted file mode 100644
index 4a981ca90b0b..000000000000
--- a/net/sched/sch_atm.c
+++ /dev/null
@@ -1,706 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/atmdev.h>
-#include <linux/atmclip.h>
-#include <linux/rtnetlink.h>
-#include <linux/file.h>		/* for fput */
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-/*
- * The ATM queuing discipline provides a framework for invoking classifiers
- * (aka "filters"), which in turn select classes of this queuing discipline.
- * Each class maps the flow(s) it is handling to a given VC. Multiple classes
- * may share the same VC.
- *
- * When creating a class, VCs are specified by passing the number of the open
- * socket descriptor by which the calling process references the VC. The kernel
- * keeps the VC open at least until all classes using it are removed.
- *
- * In this file, most functions are named atm_tc_* to avoid confusion with all
- * the atm_* in net/atm. This naming convention differs from what's used in the
- * rest of net/sched.
- *
- * Known bugs:
- *  - sometimes messes up the IP stack
- *  - any manipulations besides the few operations described in the README, are
- *    untested and likely to crash the system
- *  - should lock the flow while there is data in the queue (?)
- */
-
-#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
-
-struct atm_flow_data {
-	struct Qdisc_class_common common;
-	struct Qdisc		*q;	/* FIFO, TBF, etc. */
-	struct tcf_proto __rcu	*filter_list;
-	struct tcf_block	*block;
-	struct atm_vcc		*vcc;	/* VCC; NULL if VCC is closed */
-	void			(*old_pop)(struct atm_vcc *vcc,
-					   struct sk_buff *skb); /* chaining */
-	struct atm_qdisc_data	*parent;	/* parent qdisc */
-	struct socket		*sock;		/* for closing */
-	int			ref;		/* reference count */
-	struct gnet_stats_basic_sync	bstats;
-	struct gnet_stats_queue	qstats;
-	struct list_head	list;
-	struct atm_flow_data	*excess;	/* flow for excess traffic;
-						   NULL to set CLP instead */
-	int			hdr_len;
-	unsigned char		hdr[];		/* header data; MUST BE LAST */
-};
-
-struct atm_qdisc_data {
-	struct atm_flow_data	link;		/* unclassified skbs go here */
-	struct list_head	flows;		/* NB: "link" is also on this
-						   list */
-	struct tasklet_struct	task;		/* dequeue tasklet */
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
-
-	list_for_each_entry(flow, &p->flows, list) {
-		if (flow->common.classid == classid)
-			return flow;
-	}
-	return NULL;
-}
-
-static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
-			struct Qdisc *new, struct Qdisc **old,
-			struct netlink_ext_ack *extack)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
-	pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
-		sch, p, flow, new, old);
-	if (list_empty(&flow->list))
-		return -EINVAL;
-	if (!new)
-		new = &noop_qdisc;
-	*old = flow->q;
-	flow->q = new;
-	if (*old)
-		qdisc_reset(*old);
-	return 0;
-}
-
-static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
-{
-	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
-	pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
-	return flow ? flow->q : NULL;
-}
-
-static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
-{
-	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
-	struct atm_flow_data *flow;
-
-	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
-	flow = lookup_flow(sch, classid);
-	pr_debug("%s: flow %p\n", __func__, flow);
-	return (unsigned long)flow;
-}
-
-static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
-					unsigned long parent, u32 classid)
-{
-	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
-	struct atm_flow_data *flow;
-
-	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
-	flow = lookup_flow(sch, classid);
-	if (flow)
-		flow->ref++;
-	pr_debug("%s: flow %p\n", __func__, flow);
-	return (unsigned long)flow;
-}
-
-/*
- * atm_tc_put handles all destructions, including the ones that are explicitly
- * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
- * anything that still seems to be in use.
- */
-static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
-	pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	if (--flow->ref)
-		return;
-	pr_debug("atm_tc_put: destroying\n");
-	list_del_init(&flow->list);
-	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
-	qdisc_put(flow->q);
-	tcf_block_put(flow->block);
-	if (flow->sock) {
-		pr_debug("atm_tc_put: f_count %ld\n",
-			file_count(flow->sock->file));
-		flow->vcc->pop = flow->old_pop;
-		sockfd_put(flow->sock);
-	}
-	if (flow->excess)
-		atm_tc_put(sch, (unsigned long)flow->excess);
-	if (flow != &p->link)
-		kfree(flow);
-	/*
-	 * If flow == &p->link, the qdisc no longer works at this point and
-	 * needs to be removed. (By the caller of atm_tc_put.)
-	 */
-}
-
-static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
-{
-	struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
-
-	pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
-	VCC2FLOW(vcc)->old_pop(vcc, skb);
-	tasklet_schedule(&p->task);
-}
-
-static const u8 llc_oui_ip[] = {
-	0xaa,			/* DSAP: non-ISO */
-	0xaa,			/* SSAP: non-ISO */
-	0x03,			/* Ctrl: Unnumbered Information Command PDU */
-	0x00,			/* OUI: EtherType */
-	0x00, 0x00,
-	0x08, 0x00
-};				/* Ethertype IP (0800) */
-
-static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
-	[TCA_ATM_FD]		= { .type = NLA_U32 },
-	[TCA_ATM_EXCESS]	= { .type = NLA_U32 },
-};
-
-static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
-			 struct nlattr **tca, unsigned long *arg,
-			 struct netlink_ext_ack *extack)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
-	struct atm_flow_data *excess = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS];
-	struct nlattr *tb[TCA_ATM_MAX + 1];
-	struct socket *sock;
-	int fd, error, hdr_len;
-	void *hdr;
-
-	pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
-		"flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
-	/*
-	 * The concept of parents doesn't apply for this qdisc.
-	 */
-	if (parent && parent != TC_H_ROOT && parent != sch->handle)
-		return -EINVAL;
-	/*
-	 * ATM classes cannot be changed. In order to change properties of the
-	 * ATM connection, that socket needs to be modified directly (via the
-	 * native ATM API. In order to send a flow to a different VC, the old
-	 * class needs to be removed and a new one added. (This may be changed
-	 * later.)
-	 */
-	if (flow)
-		return -EBUSY;
-	if (opt == NULL)
-		return -EINVAL;
-
-	error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy,
-					    NULL);
-	if (error < 0)
-		return error;
-
-	if (!tb[TCA_ATM_FD])
-		return -EINVAL;
-	fd = nla_get_u32(tb[TCA_ATM_FD]);
-	pr_debug("atm_tc_change: fd %d\n", fd);
-	if (tb[TCA_ATM_HDR]) {
-		hdr_len = nla_len(tb[TCA_ATM_HDR]);
-		hdr = nla_data(tb[TCA_ATM_HDR]);
-	} else {
-		hdr_len = RFC1483LLC_LEN;
-		hdr = NULL;	/* default LLC/SNAP for IP */
-	}
-	if (!tb[TCA_ATM_EXCESS])
-		excess = NULL;
-	else {
-		excess = (struct atm_flow_data *)
-			atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
-		if (!excess)
-			return -ENOENT;
-	}
-	pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
-		 opt->nla_type, nla_len(opt), hdr_len);
-	sock = sockfd_lookup(fd, &error);
-	if (!sock)
-		return error;	/* f_count++ */
-	pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
-	if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
-		error = -EPROTOTYPE;
-		goto err_out;
-	}
-	/* @@@ should check if the socket is really operational or we'll crash
-	   on vcc->send */
-	if (classid) {
-		if (TC_H_MAJ(classid ^ sch->handle)) {
-			pr_debug("atm_tc_change: classid mismatch\n");
-			error = -EINVAL;
-			goto err_out;
-		}
-	} else {
-		int i;
-		unsigned long cl;
-
-		for (i = 1; i < 0x8000; i++) {
-			classid = TC_H_MAKE(sch->handle, 0x8000 | i);
-			cl = atm_tc_find(sch, classid);
-			if (!cl)
-				break;
-		}
-	}
-	pr_debug("atm_tc_change: new id %x\n", classid);
-	flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
-	pr_debug("atm_tc_change: flow %p\n", flow);
-	if (!flow) {
-		error = -ENOBUFS;
-		goto err_out;
-	}
-
-	error = tcf_block_get(&flow->block, &flow->filter_list, sch,
-			      extack);
-	if (error) {
-		kfree(flow);
-		goto err_out;
-	}
-
-	flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
-				    extack);
-	if (!flow->q)
-		flow->q = &noop_qdisc;
-	pr_debug("atm_tc_change: qdisc %p\n", flow->q);
-	flow->sock = sock;
-	flow->vcc = ATM_SD(sock);	/* speedup */
-	flow->vcc->user_back = flow;
-	pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
-	flow->old_pop = flow->vcc->pop;
-	flow->parent = p;
-	flow->vcc->pop = sch_atm_pop;
-	flow->common.classid = classid;
-	flow->ref = 1;
-	flow->excess = excess;
-	list_add(&flow->list, &p->link.list);
-	flow->hdr_len = hdr_len;
-	if (hdr)
-		memcpy(flow->hdr, hdr, hdr_len);
-	else
-		memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip));
-	*arg = (unsigned long)flow;
-	return 0;
-err_out:
-	sockfd_put(sock);
-	return error;
-}
-
-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
-			 struct netlink_ext_ack *extack)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
-	pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	if (list_empty(&flow->list))
-		return -EINVAL;
-	if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
-		return -EBUSY;
-	/*
-	 * Reference count must be 2: one for "keepalive" (set at class
-	 * creation), and one for the reference held when calling delete.
-	 */
-	if (flow->ref < 2) {
-		pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
-		return -EINVAL;
-	}
-	if (flow->ref > 2)
-		return -EBUSY;	/* catch references via excess, etc. */
-	atm_tc_put(sch, arg);
-	return 0;
-}
-
-static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
-
-	pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
-	if (walker->stop)
-		return;
-	list_for_each_entry(flow, &p->flows, list) {
-		if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker))
-			break;
-	}
-}
-
-static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
-					  struct netlink_ext_ack *extack)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
-	pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	return flow ? flow->block : p->link.block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
-			  struct sk_buff **to_free)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
-	struct tcf_result res;
-	int result;
-	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-
-	pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-	result = TC_ACT_OK;	/* be nice to gcc */
-	flow = NULL;
-	if (TC_H_MAJ(skb->priority) != sch->handle ||
-	    !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
-		struct tcf_proto *fl;
-
-		list_for_each_entry(flow, &p->flows, list) {
-			fl = rcu_dereference_bh(flow->filter_list);
-			if (fl) {
-				result = tcf_classify(skb, NULL, fl, &res, true);
-				if (result < 0)
-					continue;
-				if (result == TC_ACT_SHOT)
-					goto done;
-
-				flow = (struct atm_flow_data *)res.class;
-				if (!flow)
-					flow = lookup_flow(sch, res.classid);
-				goto drop;
-			}
-		}
-		flow = NULL;
-done:
-		;
-	}
-	if (!flow) {
-		flow = &p->link;
-	} else {
-		if (flow->vcc)
-			ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
-		/*@@@ looks good ... but it's not supposed to work :-) */
-#ifdef CONFIG_NET_CLS_ACT
-		switch (result) {
-		case TC_ACT_QUEUED:
-		case TC_ACT_STOLEN:
-		case TC_ACT_TRAP:
-			__qdisc_drop(skb, to_free);
-			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
-		case TC_ACT_SHOT:
-			__qdisc_drop(skb, to_free);
-			goto drop;
-		case TC_ACT_RECLASSIFY:
-			if (flow->excess)
-				flow = flow->excess;
-			else
-				ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP;
-			break;
-		}
-#endif
-	}
-
-	ret = qdisc_enqueue(skb, flow->q, to_free);
-	if (ret != NET_XMIT_SUCCESS) {
-drop: __maybe_unused
-		if (net_xmit_drop_count(ret)) {
-			qdisc_qstats_drop(sch);
-			if (flow)
-				flow->qstats.drops++;
-		}
-		return ret;
-	}
-	/*
-	 * Okay, this may seem weird. We pretend we've dropped the packet if
-	 * it goes via ATM. The reason for this is that the outer qdisc
-	 * expects to be able to q->dequeue the packet later on if we return
-	 * success at this place. Also, sch->q.qdisc needs to reflect whether
-	 * there is a packet egligible for dequeuing or not. Note that the
-	 * statistics of the outer qdisc are necessarily wrong because of all
-	 * this. There's currently no correct solution for this.
-	 */
-	if (flow == &p->link) {
-		sch->q.qlen++;
-		return NET_XMIT_SUCCESS;
-	}
-	tasklet_schedule(&p->task);
-	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-/*
- * Dequeue packets and send them over ATM. Note that we quite deliberately
- * avoid checking net_device's flow control here, simply because sch_atm
- * uses its own channels, which have nothing to do with any CLIP/LANE/or
- * non-ATM interfaces.
- */
-
-static void sch_atm_dequeue(struct tasklet_struct *t)
-{
-	struct atm_qdisc_data *p = from_tasklet(p, t, task);
-	struct Qdisc *sch = qdisc_from_priv(p);
-	struct atm_flow_data *flow;
-	struct sk_buff *skb;
-
-	pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
-	list_for_each_entry(flow, &p->flows, list) {
-		if (flow == &p->link)
-			continue;
-		/*
-		 * If traffic is properly shaped, this won't generate nasty
-		 * little bursts. Otherwise, it may ... (but that's okay)
-		 */
-		while ((skb = flow->q->ops->peek(flow->q))) {
-			if (!atm_may_send(flow->vcc, skb->truesize))
-				break;
-
-			skb = qdisc_dequeue_peeked(flow->q);
-			if (unlikely(!skb))
-				break;
-
-			qdisc_bstats_update(sch, skb);
-			bstats_update(&flow->bstats, skb);
-			pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
-			/* remove any LL header somebody else has attached */
-			skb_pull(skb, skb_network_offset(skb));
-			if (skb_headroom(skb) < flow->hdr_len) {
-				struct sk_buff *new;
-
-				new = skb_realloc_headroom(skb, flow->hdr_len);
-				dev_kfree_skb(skb);
-				if (!new)
-					continue;
-				skb = new;
-			}
-			pr_debug("sch_atm_dequeue: ip %p, data %p\n",
-				 skb_network_header(skb), skb->data);
-			ATM_SKB(skb)->vcc = flow->vcc;
-			memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
-			       flow->hdr_len);
-			refcount_add(skb->truesize,
-				   &sk_atm(flow->vcc)->sk_wmem_alloc);
-			/* atm.atm_options are already set by atm_tc_enqueue */
-			flow->vcc->send(flow->vcc, skb);
-		}
-	}
-}
-
-static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct sk_buff *skb;
-
-	pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
-	tasklet_schedule(&p->task);
-	skb = qdisc_dequeue_peeked(p->link.q);
-	if (skb)
-		sch->q.qlen--;
-	return skb;
-}
-
-static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-
-	pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
-
-	return p->link.q->ops->peek(p->link.q);
-}
-
-static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
-		       struct netlink_ext_ack *extack)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	int err;
-
-	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
-	INIT_LIST_HEAD(&p->flows);
-	INIT_LIST_HEAD(&p->link.list);
-	gnet_stats_basic_sync_init(&p->link.bstats);
-	list_add(&p->link.list, &p->flows);
-	p->link.q = qdisc_create_dflt(sch->dev_queue,
-				      &pfifo_qdisc_ops, sch->handle, extack);
-	if (!p->link.q)
-		p->link.q = &noop_qdisc;
-	pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
-	p->link.vcc = NULL;
-	p->link.sock = NULL;
-	p->link.common.classid = sch->handle;
-	p->link.ref = 1;
-
-	err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
-			    extack);
-	if (err)
-		return err;
-
-	tasklet_setup(&p->task, sch_atm_dequeue);
-	return 0;
-}
-
-static void atm_tc_reset(struct Qdisc *sch)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
-
-	pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
-	list_for_each_entry(flow, &p->flows, list)
-		qdisc_reset(flow->q);
-}
-
-static void atm_tc_destroy(struct Qdisc *sch)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow, *tmp;
-
-	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
-	list_for_each_entry(flow, &p->flows, list) {
-		tcf_block_put(flow->block);
-		flow->block = NULL;
-	}
-
-	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
-		if (flow->ref > 1)
-			pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
-		atm_tc_put(sch, (unsigned long)flow);
-	}
-	tasklet_kill(&p->task);
-}
-
-static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
-			     struct sk_buff *skb, struct tcmsg *tcm)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-	struct nlattr *nest;
-
-	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
-		sch, p, flow, skb, tcm);
-	if (list_empty(&flow->list))
-		return -EINVAL;
-	tcm->tcm_handle = flow->common.classid;
-	tcm->tcm_info = flow->q->handle;
-
-	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
-	if (nest == NULL)
-		goto nla_put_failure;
-
-	if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr))
-		goto nla_put_failure;
-	if (flow->vcc) {
-		struct sockaddr_atmpvc pvc;
-		int state;
-
-		memset(&pvc, 0, sizeof(pvc));
-		pvc.sap_family = AF_ATMPVC;
-		pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
-		pvc.sap_addr.vpi = flow->vcc->vpi;
-		pvc.sap_addr.vci = flow->vcc->vci;
-		if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc))
-			goto nla_put_failure;
-		state = ATM_VF2VS(flow->vcc->flags);
-		if (nla_put_u32(skb, TCA_ATM_STATE, state))
-			goto nla_put_failure;
-	}
-	if (flow->excess) {
-		if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
-			goto nla_put_failure;
-	} else {
-		if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
-			goto nla_put_failure;
-	}
-	return nla_nest_end(skb, nest);
-
-nla_put_failure:
-	nla_nest_cancel(skb, nest);
-	return -1;
-}
-static int
-atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
-			struct gnet_dump *d)
-{
-	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
-	if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
-		return -1;
-
-	return 0;
-}
-
-static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
-	return 0;
-}
-
-static const struct Qdisc_class_ops atm_class_ops = {
-	.graft		= atm_tc_graft,
-	.leaf		= atm_tc_leaf,
-	.find		= atm_tc_find,
-	.change		= atm_tc_change,
-	.delete		= atm_tc_delete,
-	.walk		= atm_tc_walk,
-	.tcf_block	= atm_tc_tcf_block,
-	.bind_tcf	= atm_tc_bind_filter,
-	.unbind_tcf	= atm_tc_put,
-	.dump		= atm_tc_dump_class,
-	.dump_stats	= atm_tc_dump_class_stats,
-};
-
-static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
-	.cl_ops		= &atm_class_ops,
-	.id		= "atm",
-	.priv_size	= sizeof(struct atm_qdisc_data),
-	.enqueue	= atm_tc_enqueue,
-	.dequeue	= atm_tc_dequeue,
-	.peek		= atm_tc_peek,
-	.init		= atm_tc_init,
-	.reset		= atm_tc_reset,
-	.destroy	= atm_tc_destroy,
-	.dump		= atm_tc_dump,
-	.owner		= THIS_MODULE,
-};
-
-static int __init atm_init(void)
-{
-	return register_qdisc(&atm_qdisc_ops);
-}
-
-static void __exit atm_exit(void)
-{
-	unregister_qdisc(&atm_qdisc_ops);
-}
-
-module_init(atm_init)
-module_exit(atm_exit)
-MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json
deleted file mode 100644
index f5bc8670a67d..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json
+++ /dev/null
@@ -1,94 +0,0 @@
-[
-    {
-        "id": "7628",
-        "name": "Create ATM with default setting",
-        "category": [
-            "qdisc",
-            "atm"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc atm 1: root refcnt",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "390a",
-        "name": "Delete ATM with valid handle",
-        "category": [
-            "qdisc",
-            "atm"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true",
-            "$TC qdisc add dev $DUMMY handle 1: root atm"
-        ],
-        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc atm 1: root refcnt",
-        "matchCount": "0",
-        "teardown": [
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "32a0",
-        "name": "Show ATM class",
-        "category": [
-            "qdisc",
-            "atm"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm",
-        "expExitCode": "0",
-        "verifyCmd": "$TC class show dev $DUMMY",
-        "matchPattern": "class atm 1: parent 1:",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "6310",
-        "name": "Dump ATM stats",
-        "category": [
-            "qdisc",
-            "atm"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -s qdisc show dev $DUMMY",
-        "matchPattern": "qdisc atm 1: root refcnt",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    }
-]
-- 
cgit v1.2.3


From bbe77c14ee6185a61ba6d5e435c1cbb489d2a9ed Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 14 Feb 2023 08:49:13 -0500
Subject: net/sched: Retire dsmark qdisc

The dsmark qdisc has served us well over the years for diffserv but has not
been getting much attention due to other more popular approaches to do diffserv
services. Most recently it has become a shooting target for syzkaller. For this
reason, we are retiring it.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/sched/Kconfig                                  |  11 -
 net/sched/Makefile                                 |   1 -
 net/sched/sch_dsmark.c                             | 518 ---------------------
 .../tc-testing/tc-tests/qdiscs/dsmark.json         | 140 ------
 4 files changed, 670 deletions(-)
 delete mode 100644 net/sched/sch_dsmark.c
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json

(limited to 'tools/testing')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 869321db12f7..d909f289a67f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -192,17 +192,6 @@ config NET_SCH_GRED
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_gred.
 
-config NET_SCH_DSMARK
-	tristate "Differentiated Services marker (DSMARK)"
-	help
-	  Say Y if you want to schedule packets according to the
-	  Differentiated Services architecture proposed in RFC 2475.
-	  Technical information on this method, with pointers to associated
-	  RFCs, is available at <http://www.gta.ufrj.br/diffserv/>.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called sch_dsmark.
-
 config NET_SCH_NETEM
 	tristate "Network emulator (NETEM)"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index d2612b47530c..0852e989af96 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_NET_SCH_HFSC)	+= sch_hfsc.o
 obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
 obj-$(CONFIG_NET_SCH_GRED)	+= sch_gred.o
 obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ingress.o
-obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o
 obj-$(CONFIG_NET_SCH_SFB)	+= sch_sfb.o
 obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
 obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
deleted file mode 100644
index 401ffaf87d62..000000000000
--- a/net/sched/sch_dsmark.c
+++ /dev/null
@@ -1,518 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_dsmark.c - Differentiated Services field marker */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/bitops.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <asm/byteorder.h>
-
-/*
- * classid	class		marking
- * -------	-----		-------
- *   n/a	  0		n/a
- *   x:0	  1		use entry [0]
- *   ...	 ...		...
- *   x:y y>0	 y+1		use entry [y]
- *   ...	 ...		...
- * x:indices-1	indices		use entry [indices-1]
- *   ...	 ...		...
- *   x:y	 y+1		use entry [y & (indices-1)]
- *   ...	 ...		...
- * 0xffff	0x10000		use entry [indices-1]
- */
-
-
-#define NO_DEFAULT_INDEX	(1 << 16)
-
-struct mask_value {
-	u8			mask;
-	u8			value;
-};
-
-struct dsmark_qdisc_data {
-	struct Qdisc		*q;
-	struct tcf_proto __rcu	*filter_list;
-	struct tcf_block	*block;
-	struct mask_value	*mv;
-	u16			indices;
-	u8			set_tc_index;
-	u32			default_index;	/* index range is 0...0xffff */
-#define DSMARK_EMBEDDED_SZ	16
-	struct mask_value	embedded[DSMARK_EMBEDDED_SZ];
-};
-
-static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
-{
-	return index <= p->indices && index > 0;
-}
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
-			struct Qdisc *new, struct Qdisc **old,
-			struct netlink_ext_ack *extack)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
-	pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n",
-		 __func__, sch, p, new, old);
-
-	if (new == NULL) {
-		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-					sch->handle, NULL);
-		if (new == NULL)
-			new = &noop_qdisc;
-	}
-
-	*old = qdisc_replace(sch, new, &p->q);
-	return 0;
-}
-
-static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	return p->q;
-}
-
-static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
-{
-	return TC_H_MIN(classid) + 1;
-}
-
-static unsigned long dsmark_bind_filter(struct Qdisc *sch,
-					unsigned long parent, u32 classid)
-{
-	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
-		 __func__, sch, qdisc_priv(sch), classid);
-
-	return dsmark_find(sch, classid);
-}
-
-static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
-{
-}
-
-static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
-	[TCA_DSMARK_INDICES]		= { .type = NLA_U16 },
-	[TCA_DSMARK_DEFAULT_INDEX]	= { .type = NLA_U16 },
-	[TCA_DSMARK_SET_TC_INDEX]	= { .type = NLA_FLAG },
-	[TCA_DSMARK_MASK]		= { .type = NLA_U8 },
-	[TCA_DSMARK_VALUE]		= { .type = NLA_U8 },
-};
-
-static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
-			 struct nlattr **tca, unsigned long *arg,
-			 struct netlink_ext_ack *extack)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct nlattr *opt = tca[TCA_OPTIONS];
-	struct nlattr *tb[TCA_DSMARK_MAX + 1];
-	int err = -EINVAL;
-
-	pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
-		 __func__, sch, p, classid, parent, *arg);
-
-	if (!dsmark_valid_index(p, *arg)) {
-		err = -ENOENT;
-		goto errout;
-	}
-
-	if (!opt)
-		goto errout;
-
-	err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
-					  dsmark_policy, NULL);
-	if (err < 0)
-		goto errout;
-
-	if (tb[TCA_DSMARK_VALUE])
-		p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
-
-	if (tb[TCA_DSMARK_MASK])
-		p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-
-	err = 0;
-
-errout:
-	return err;
-}
-
-static int dsmark_delete(struct Qdisc *sch, unsigned long arg,
-			 struct netlink_ext_ack *extack)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
-	if (!dsmark_valid_index(p, arg))
-		return -EINVAL;
-
-	p->mv[arg - 1].mask = 0xff;
-	p->mv[arg - 1].value = 0;
-
-	return 0;
-}
-
-static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	int i;
-
-	pr_debug("%s(sch %p,[qdisc %p],walker %p)\n",
-		 __func__, sch, p, walker);
-
-	if (walker->stop)
-		return;
-
-	for (i = 0; i < p->indices; i++) {
-		if (p->mv[i].mask == 0xff && !p->mv[i].value) {
-			walker->count++;
-			continue;
-		}
-		if (!tc_qdisc_stats_dump(sch, i + 1, walker))
-			break;
-	}
-}
-
-static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
-					  struct netlink_ext_ack *extack)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
-	return p->block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
-			  struct sk_buff **to_free)
-{
-	unsigned int len = qdisc_pkt_len(skb);
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	int err;
-
-	pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
-
-	if (p->set_tc_index) {
-		int wlen = skb_network_offset(skb);
-
-		switch (skb_protocol(skb, true)) {
-		case htons(ETH_P_IP):
-			wlen += sizeof(struct iphdr);
-			if (!pskb_may_pull(skb, wlen) ||
-			    skb_try_make_writable(skb, wlen))
-				goto drop;
-
-			skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
-				& ~INET_ECN_MASK;
-			break;
-
-		case htons(ETH_P_IPV6):
-			wlen += sizeof(struct ipv6hdr);
-			if (!pskb_may_pull(skb, wlen) ||
-			    skb_try_make_writable(skb, wlen))
-				goto drop;
-
-			skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
-				& ~INET_ECN_MASK;
-			break;
-		default:
-			skb->tc_index = 0;
-			break;
-		}
-	}
-
-	if (TC_H_MAJ(skb->priority) == sch->handle)
-		skb->tc_index = TC_H_MIN(skb->priority);
-	else {
-		struct tcf_result res;
-		struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
-		int result = tcf_classify(skb, NULL, fl, &res, false);
-
-		pr_debug("result %d class 0x%04x\n", result, res.classid);
-
-		switch (result) {
-#ifdef CONFIG_NET_CLS_ACT
-		case TC_ACT_QUEUED:
-		case TC_ACT_STOLEN:
-		case TC_ACT_TRAP:
-			__qdisc_drop(skb, to_free);
-			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
-
-		case TC_ACT_SHOT:
-			goto drop;
-#endif
-		case TC_ACT_OK:
-			skb->tc_index = TC_H_MIN(res.classid);
-			break;
-
-		default:
-			if (p->default_index != NO_DEFAULT_INDEX)
-				skb->tc_index = p->default_index;
-			break;
-		}
-	}
-
-	err = qdisc_enqueue(skb, p->q, to_free);
-	if (err != NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(err))
-			qdisc_qstats_drop(sch);
-		return err;
-	}
-
-	sch->qstats.backlog += len;
-	sch->q.qlen++;
-
-	return NET_XMIT_SUCCESS;
-
-drop:
-	qdisc_drop(skb, sch, to_free);
-	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct sk_buff *skb;
-	u32 index;
-
-	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
-	skb = qdisc_dequeue_peeked(p->q);
-	if (skb == NULL)
-		return NULL;
-
-	qdisc_bstats_update(sch, skb);
-	qdisc_qstats_backlog_dec(sch, skb);
-	sch->q.qlen--;
-
-	index = skb->tc_index & (p->indices - 1);
-	pr_debug("index %d->%d\n", skb->tc_index, index);
-
-	switch (skb_protocol(skb, true)) {
-	case htons(ETH_P_IP):
-		ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
-				    p->mv[index].value);
-			break;
-	case htons(ETH_P_IPV6):
-		ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
-				    p->mv[index].value);
-			break;
-	default:
-		/*
-		 * Only complain if a change was actually attempted.
-		 * This way, we can send non-IP traffic through dsmark
-		 * and don't need yet another qdisc as a bypass.
-		 */
-		if (p->mv[index].mask != 0xff || p->mv[index].value)
-			pr_warn("%s: unsupported protocol %d\n",
-				__func__, ntohs(skb_protocol(skb, true)));
-		break;
-	}
-
-	return skb;
-}
-
-static struct sk_buff *dsmark_peek(struct Qdisc *sch)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
-	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
-	return p->q->ops->peek(p->q);
-}
-
-static int dsmark_init(struct Qdisc *sch, struct nlattr *opt,
-		       struct netlink_ext_ack *extack)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct nlattr *tb[TCA_DSMARK_MAX + 1];
-	int err = -EINVAL;
-	u32 default_index = NO_DEFAULT_INDEX;
-	u16 indices;
-	int i;
-
-	pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
-
-	if (!opt)
-		goto errout;
-
-	err = tcf_block_get(&p->block, &p->filter_list, sch, extack);
-	if (err)
-		return err;
-
-	err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
-					  dsmark_policy, NULL);
-	if (err < 0)
-		goto errout;
-
-	err = -EINVAL;
-	if (!tb[TCA_DSMARK_INDICES])
-		goto errout;
-	indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
-
-	if (hweight32(indices) != 1)
-		goto errout;
-
-	if (tb[TCA_DSMARK_DEFAULT_INDEX])
-		default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
-
-	if (indices <= DSMARK_EMBEDDED_SZ)
-		p->mv = p->embedded;
-	else
-		p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
-	if (!p->mv) {
-		err = -ENOMEM;
-		goto errout;
-	}
-	for (i = 0; i < indices; i++) {
-		p->mv[i].mask = 0xff;
-		p->mv[i].value = 0;
-	}
-	p->indices = indices;
-	p->default_index = default_index;
-	p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
-
-	p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle,
-				 NULL);
-	if (p->q == NULL)
-		p->q = &noop_qdisc;
-	else
-		qdisc_hash_add(p->q, true);
-
-	pr_debug("%s: qdisc %p\n", __func__, p->q);
-
-	err = 0;
-errout:
-	return err;
-}
-
-static void dsmark_reset(struct Qdisc *sch)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
-	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-	if (p->q)
-		qdisc_reset(p->q);
-}
-
-static void dsmark_destroy(struct Qdisc *sch)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
-	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
-	tcf_block_put(p->block);
-	qdisc_put(p->q);
-	if (p->mv != p->embedded)
-		kfree(p->mv);
-}
-
-static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
-			     struct sk_buff *skb, struct tcmsg *tcm)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct nlattr *opts = NULL;
-
-	pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl);
-
-	if (!dsmark_valid_index(p, cl))
-		return -EINVAL;
-
-	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
-	tcm->tcm_info = p->q->handle;
-
-	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
-	if (opts == NULL)
-		goto nla_put_failure;
-	if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
-	    nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
-		goto nla_put_failure;
-
-	return nla_nest_end(skb, opts);
-
-nla_put_failure:
-	nla_nest_cancel(skb, opts);
-	return -EMSGSIZE;
-}
-
-static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	struct nlattr *opts = NULL;
-
-	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
-	if (opts == NULL)
-		goto nla_put_failure;
-	if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices))
-		goto nla_put_failure;
-
-	if (p->default_index != NO_DEFAULT_INDEX &&
-	    nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index))
-		goto nla_put_failure;
-
-	if (p->set_tc_index &&
-	    nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX))
-		goto nla_put_failure;
-
-	return nla_nest_end(skb, opts);
-
-nla_put_failure:
-	nla_nest_cancel(skb, opts);
-	return -EMSGSIZE;
-}
-
-static const struct Qdisc_class_ops dsmark_class_ops = {
-	.graft		=	dsmark_graft,
-	.leaf		=	dsmark_leaf,
-	.find		=	dsmark_find,
-	.change		=	dsmark_change,
-	.delete		=	dsmark_delete,
-	.walk		=	dsmark_walk,
-	.tcf_block	=	dsmark_tcf_block,
-	.bind_tcf	=	dsmark_bind_filter,
-	.unbind_tcf	=	dsmark_unbind_filter,
-	.dump		=	dsmark_dump_class,
-};
-
-static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
-	.next		=	NULL,
-	.cl_ops		=	&dsmark_class_ops,
-	.id		=	"dsmark",
-	.priv_size	=	sizeof(struct dsmark_qdisc_data),
-	.enqueue	=	dsmark_enqueue,
-	.dequeue	=	dsmark_dequeue,
-	.peek		=	dsmark_peek,
-	.init		=	dsmark_init,
-	.reset		=	dsmark_reset,
-	.destroy	=	dsmark_destroy,
-	.change		=	NULL,
-	.dump		=	dsmark_dump,
-	.owner		=	THIS_MODULE,
-};
-
-static int __init dsmark_module_init(void)
-{
-	return register_qdisc(&dsmark_qdisc_ops);
-}
-
-static void __exit dsmark_module_exit(void)
-{
-	unregister_qdisc(&dsmark_qdisc_ops);
-}
-
-module_init(dsmark_module_init)
-module_exit(dsmark_module_exit)
-
-MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json
deleted file mode 100644
index c030795f9c37..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json
+++ /dev/null
@@ -1,140 +0,0 @@
-[
-    {
-        "id": "6345",
-        "name": "Create DSMARK with default setting",
-        "category": [
-            "qdisc",
-            "dsmark"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "3462",
-        "name": "Create DSMARK with default_index setting",
-        "category": [
-            "qdisc",
-            "dsmark"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "ca95",
-        "name": "Create DSMARK with set_tc_index flag",
-        "category": [
-            "qdisc",
-            "dsmark"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "a950",
-        "name": "Create DSMARK with multiple setting",
-        "category": [
-            "qdisc",
-            "dsmark"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "4092",
-        "name": "Delete DSMARK with handle",
-        "category": [
-            "qdisc",
-            "dsmark"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true",
-            "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024"
-        ],
-        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
-        "expExitCode": "0",
-        "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400",
-        "matchCount": "0",
-        "teardown": [
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    },
-    {
-        "id": "5930",
-        "name": "Show DSMARK class",
-        "category": [
-            "qdisc",
-            "dsmark"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$IP link add dev $DUMMY type dummy || /bin/true"
-        ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024",
-        "expExitCode": "0",
-        "verifyCmd": "$TC class show dev $DUMMY",
-        "matchPattern": "class dsmark 1:",
-        "matchCount": "0",
-        "teardown": [
-            "$TC qdisc del dev $DUMMY handle 1: root",
-            "$IP link del dev $DUMMY type dummy"
-        ]
-    }
-]
-- 
cgit v1.2.3


From 8c710f75256bb3cf05ac7b1672c82b92c43f3d28 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 14 Feb 2023 08:49:14 -0500
Subject: net/sched: Retire tcindex classifier

The tcindex classifier has served us well for about a quarter of a century
but has not been getting much TLC due to lack of known users. Most recently
it has become easy prey to syzkaller. For this reason, we are retiring it.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tc_wrapper.h                           |   5 -
 net/sched/Kconfig                                  |  11 -
 net/sched/Makefile                                 |   1 -
 net/sched/cls_tcindex.c                            | 716 ---------------------
 .../tc-testing/tc-tests/filters/tcindex.json       | 227 -------
 5 files changed, 960 deletions(-)
 delete mode 100644 net/sched/cls_tcindex.c
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json

(limited to 'tools/testing')

diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index d323fffb839a..8ba241760d0a 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -154,7 +154,6 @@ TC_INDIRECT_FILTER_DECLARE(mall_classify);
 TC_INDIRECT_FILTER_DECLARE(route4_classify);
 TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
 TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
-TC_INDIRECT_FILTER_DECLARE(tcindex_classify);
 TC_INDIRECT_FILTER_DECLARE(u32_classify);
 
 static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -207,10 +206,6 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (tp->classify == rsvp6_classify)
 		return rsvp6_classify(skb, tp, res);
 #endif
-#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX)
-	if (tp->classify == tcindex_classify)
-		return tcindex_classify(skb, tp, res);
-#endif
 
 skip:
 	return tp->classify(skb, tp, res);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index d909f289a67f..111883853f08 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -468,17 +468,6 @@ config NET_CLS_BASIC
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_basic.
 
-config NET_CLS_TCINDEX
-	tristate "Traffic-Control Index (TCINDEX)"
-	select NET_CLS
-	help
-	  Say Y here if you want to be able to classify packets based on
-	  traffic control indices. You will want this feature if you want
-	  to implement Differentiated Services together with DSMARK.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called cls_tcindex.
-
 config NET_CLS_ROUTE4
 	tristate "Routing decision (ROUTE)"
 	depends on INET
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 0852e989af96..ea236d258c16 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -68,7 +68,6 @@ obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
 obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
-obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
deleted file mode 100644
index ee2a050c887b..000000000000
--- a/net/sched/cls_tcindex.c
+++ /dev/null
@@ -1,716 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/sched/cls_tcindex.c	Packet classifier for skb->tc_index
- *
- * Written 1998,1999 by Werner Almesberger, EPFL ICA
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/refcount.h>
-#include <net/act_api.h>
-#include <net/netlink.h>
-#include <net/pkt_cls.h>
-#include <net/sch_generic.h>
-#include <net/tc_wrapper.h>
-
-/*
- * Passing parameters to the root seems to be done more awkwardly than really
- * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
- * verified. FIXME.
- */
-
-#define PERFECT_HASH_THRESHOLD	64	/* use perfect hash if not bigger */
-#define DEFAULT_HASH_SIZE	64	/* optimized for diffserv */
-
-
-struct tcindex_data;
-
-struct tcindex_filter_result {
-	struct tcf_exts		exts;
-	struct tcf_result	res;
-	struct tcindex_data	*p;
-	struct rcu_work		rwork;
-};
-
-struct tcindex_filter {
-	u16 key;
-	struct tcindex_filter_result result;
-	struct tcindex_filter __rcu *next;
-	struct rcu_work rwork;
-};
-
-
-struct tcindex_data {
-	struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
-	struct tcindex_filter __rcu **h; /* imperfect hash; */
-	struct tcf_proto *tp;
-	u16 mask;		/* AND key with mask */
-	u32 shift;		/* shift ANDed key to the right */
-	u32 hash;		/* hash table size; 0 if undefined */
-	u32 alloc_hash;		/* allocated size */
-	u32 fall_through;	/* 0: only classify if explicit match */
-	refcount_t refcnt;	/* a temporary refcnt for perfect hash */
-	struct rcu_work rwork;
-};
-
-static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
-{
-	return tcf_exts_has_actions(&r->exts) || r->res.classid;
-}
-
-static void tcindex_data_get(struct tcindex_data *p)
-{
-	refcount_inc(&p->refcnt);
-}
-
-static void tcindex_data_put(struct tcindex_data *p)
-{
-	if (refcount_dec_and_test(&p->refcnt)) {
-		kfree(p->perfect);
-		kfree(p->h);
-		kfree(p);
-	}
-}
-
-static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
-						    u16 key)
-{
-	if (p->perfect) {
-		struct tcindex_filter_result *f = p->perfect + key;
-
-		return tcindex_filter_is_set(f) ? f : NULL;
-	} else if (p->h) {
-		struct tcindex_filter __rcu **fp;
-		struct tcindex_filter *f;
-
-		fp = &p->h[key % p->hash];
-		for (f = rcu_dereference_bh_rtnl(*fp);
-		     f;
-		     fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
-			if (f->key == key)
-				return &f->result;
-	}
-
-	return NULL;
-}
-
-TC_INDIRECT_SCOPE int tcindex_classify(struct sk_buff *skb,
-				       const struct tcf_proto *tp,
-				       struct tcf_result *res)
-{
-	struct tcindex_data *p = rcu_dereference_bh(tp->root);
-	struct tcindex_filter_result *f;
-	int key = (skb->tc_index & p->mask) >> p->shift;
-
-	pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
-		 skb, tp, res, p);
-
-	f = tcindex_lookup(p, key);
-	if (!f) {
-		struct Qdisc *q = tcf_block_q(tp->chain->block);
-
-		if (!p->fall_through)
-			return -1;
-		res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
-		res->class = 0;
-		pr_debug("alg 0x%x\n", res->classid);
-		return 0;
-	}
-	*res = f->res;
-	pr_debug("map 0x%x\n", res->classid);
-
-	return tcf_exts_exec(skb, &f->exts, res);
-}
-
-
-static void *tcindex_get(struct tcf_proto *tp, u32 handle)
-{
-	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r;
-
-	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
-	if (p->perfect && handle >= p->alloc_hash)
-		return NULL;
-	r = tcindex_lookup(p, handle);
-	return r && tcindex_filter_is_set(r) ? r : NULL;
-}
-
-static int tcindex_init(struct tcf_proto *tp)
-{
-	struct tcindex_data *p;
-
-	pr_debug("tcindex_init(tp %p)\n", tp);
-	p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	p->mask = 0xffff;
-	p->hash = DEFAULT_HASH_SIZE;
-	p->fall_through = 1;
-	refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
-
-	rcu_assign_pointer(tp->root, p);
-	return 0;
-}
-
-static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
-{
-	tcf_exts_destroy(&r->exts);
-	tcf_exts_put_net(&r->exts);
-	tcindex_data_put(r->p);
-}
-
-static void tcindex_destroy_rexts_work(struct work_struct *work)
-{
-	struct tcindex_filter_result *r;
-
-	r = container_of(to_rcu_work(work),
-			 struct tcindex_filter_result,
-			 rwork);
-	rtnl_lock();
-	__tcindex_destroy_rexts(r);
-	rtnl_unlock();
-}
-
-static void __tcindex_destroy_fexts(struct tcindex_filter *f)
-{
-	tcf_exts_destroy(&f->result.exts);
-	tcf_exts_put_net(&f->result.exts);
-	kfree(f);
-}
-
-static void tcindex_destroy_fexts_work(struct work_struct *work)
-{
-	struct tcindex_filter *f = container_of(to_rcu_work(work),
-						struct tcindex_filter,
-						rwork);
-
-	rtnl_lock();
-	__tcindex_destroy_fexts(f);
-	rtnl_unlock();
-}
-
-static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
-			  bool rtnl_held, struct netlink_ext_ack *extack)
-{
-	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = arg;
-	struct tcindex_filter __rcu **walk;
-	struct tcindex_filter *f = NULL;
-
-	pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
-	if (p->perfect) {
-		if (!r->res.class)
-			return -ENOENT;
-	} else {
-		int i;
-
-		for (i = 0; i < p->hash; i++) {
-			walk = p->h + i;
-			for (f = rtnl_dereference(*walk); f;
-			     walk = &f->next, f = rtnl_dereference(*walk)) {
-				if (&f->result == r)
-					goto found;
-			}
-		}
-		return -ENOENT;
-
-found:
-		rcu_assign_pointer(*walk, rtnl_dereference(f->next));
-	}
-	tcf_unbind_filter(tp, &r->res);
-	/* all classifiers are required to call tcf_exts_destroy() after rcu
-	 * grace period, since converted-to-rcu actions are relying on that
-	 * in cleanup() callback
-	 */
-	if (f) {
-		if (tcf_exts_get_net(&f->result.exts))
-			tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
-		else
-			__tcindex_destroy_fexts(f);
-	} else {
-		tcindex_data_get(p);
-
-		if (tcf_exts_get_net(&r->exts))
-			tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
-		else
-			__tcindex_destroy_rexts(r);
-	}
-
-	*last = false;
-	return 0;
-}
-
-static void tcindex_destroy_work(struct work_struct *work)
-{
-	struct tcindex_data *p = container_of(to_rcu_work(work),
-					      struct tcindex_data,
-					      rwork);
-
-	tcindex_data_put(p);
-}
-
-static inline int
-valid_perfect_hash(struct tcindex_data *p)
-{
-	return  p->hash > (p->mask >> p->shift);
-}
-
-static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
-	[TCA_TCINDEX_HASH]		= { .type = NLA_U32 },
-	[TCA_TCINDEX_MASK]		= { .type = NLA_U16 },
-	[TCA_TCINDEX_SHIFT]		= { .type = NLA_U32 },
-	[TCA_TCINDEX_FALL_THROUGH]	= { .type = NLA_U32 },
-	[TCA_TCINDEX_CLASSID]		= { .type = NLA_U32 },
-};
-
-static int tcindex_filter_result_init(struct tcindex_filter_result *r,
-				      struct tcindex_data *p,
-				      struct net *net)
-{
-	memset(r, 0, sizeof(*r));
-	r->p = p;
-	return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
-			     TCA_TCINDEX_POLICE);
-}
-
-static void tcindex_free_perfect_hash(struct tcindex_data *cp);
-
-static void tcindex_partial_destroy_work(struct work_struct *work)
-{
-	struct tcindex_data *p = container_of(to_rcu_work(work),
-					      struct tcindex_data,
-					      rwork);
-
-	rtnl_lock();
-	if (p->perfect)
-		tcindex_free_perfect_hash(p);
-	kfree(p);
-	rtnl_unlock();
-}
-
-static void tcindex_free_perfect_hash(struct tcindex_data *cp)
-{
-	int i;
-
-	for (i = 0; i < cp->hash; i++)
-		tcf_exts_destroy(&cp->perfect[i].exts);
-	kfree(cp->perfect);
-}
-
-static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
-{
-	int i, err = 0;
-
-	cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
-			      GFP_KERNEL | __GFP_NOWARN);
-	if (!cp->perfect)
-		return -ENOMEM;
-
-	for (i = 0; i < cp->hash; i++) {
-		err = tcf_exts_init(&cp->perfect[i].exts, net,
-				    TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-		if (err < 0)
-			goto errout;
-		cp->perfect[i].p = cp;
-	}
-
-	return 0;
-
-errout:
-	tcindex_free_perfect_hash(cp);
-	return err;
-}
-
-static int
-tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
-		  u32 handle, struct tcindex_data *p,
-		  struct tcindex_filter_result *r, struct nlattr **tb,
-		  struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
-{
-	struct tcindex_filter_result new_filter_result;
-	struct tcindex_data *cp = NULL, *oldp;
-	struct tcindex_filter *f = NULL; /* make gcc behave */
-	struct tcf_result cr = {};
-	int err, balloc = 0;
-	struct tcf_exts e;
-
-	err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-	if (err < 0)
-		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
-	if (err < 0)
-		goto errout;
-
-	err = -ENOMEM;
-	/* tcindex_data attributes must look atomic to classifier/lookup so
-	 * allocate new tcindex data and RCU assign it onto root. Keeping
-	 * perfect hash and hash pointers from old data.
-	 */
-	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
-	if (!cp)
-		goto errout;
-
-	cp->mask = p->mask;
-	cp->shift = p->shift;
-	cp->hash = p->hash;
-	cp->alloc_hash = p->alloc_hash;
-	cp->fall_through = p->fall_through;
-	cp->tp = tp;
-	refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
-
-	if (tb[TCA_TCINDEX_HASH])
-		cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
-	if (tb[TCA_TCINDEX_MASK])
-		cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
-	if (tb[TCA_TCINDEX_SHIFT]) {
-		cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-		if (cp->shift > 16) {
-			err = -EINVAL;
-			goto errout;
-		}
-	}
-	if (!cp->hash) {
-		/* Hash not specified, use perfect hash if the upper limit
-		 * of the hashing index is below the threshold.
-		 */
-		if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
-			cp->hash = (cp->mask >> cp->shift) + 1;
-		else
-			cp->hash = DEFAULT_HASH_SIZE;
-	}
-
-	if (p->perfect) {
-		int i;
-
-		if (tcindex_alloc_perfect_hash(net, cp) < 0)
-			goto errout;
-		cp->alloc_hash = cp->hash;
-		for (i = 0; i < min(cp->hash, p->hash); i++)
-			cp->perfect[i].res = p->perfect[i].res;
-		balloc = 1;
-	}
-	cp->h = p->h;
-
-	err = tcindex_filter_result_init(&new_filter_result, cp, net);
-	if (err < 0)
-		goto errout_alloc;
-	if (r)
-		cr = r->res;
-
-	err = -EBUSY;
-
-	/* Hash already allocated, make sure that we still meet the
-	 * requirements for the allocated hash.
-	 */
-	if (cp->perfect) {
-		if (!valid_perfect_hash(cp) ||
-		    cp->hash > cp->alloc_hash)
-			goto errout_alloc;
-	} else if (cp->h && cp->hash != cp->alloc_hash) {
-		goto errout_alloc;
-	}
-
-	err = -EINVAL;
-	if (tb[TCA_TCINDEX_FALL_THROUGH])
-		cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
-
-	if (!cp->perfect && !cp->h)
-		cp->alloc_hash = cp->hash;
-
-	/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
-	 * but then, we'd fail handles that may become valid after some future
-	 * mask change. While this is extremely unlikely to ever matter,
-	 * the check below is safer (and also more backwards-compatible).
-	 */
-	if (cp->perfect || valid_perfect_hash(cp))
-		if (handle >= cp->alloc_hash)
-			goto errout_alloc;
-
-
-	err = -ENOMEM;
-	if (!cp->perfect && !cp->h) {
-		if (valid_perfect_hash(cp)) {
-			if (tcindex_alloc_perfect_hash(net, cp) < 0)
-				goto errout_alloc;
-			balloc = 1;
-		} else {
-			struct tcindex_filter __rcu **hash;
-
-			hash = kcalloc(cp->hash,
-				       sizeof(struct tcindex_filter *),
-				       GFP_KERNEL);
-
-			if (!hash)
-				goto errout_alloc;
-
-			cp->h = hash;
-			balloc = 2;
-		}
-	}
-
-	if (cp->perfect)
-		r = cp->perfect + handle;
-	else
-		r = tcindex_lookup(cp, handle) ? : &new_filter_result;
-
-	if (r == &new_filter_result) {
-		f = kzalloc(sizeof(*f), GFP_KERNEL);
-		if (!f)
-			goto errout_alloc;
-		f->key = handle;
-		f->next = NULL;
-		err = tcindex_filter_result_init(&f->result, cp, net);
-		if (err < 0) {
-			kfree(f);
-			goto errout_alloc;
-		}
-	}
-
-	if (tb[TCA_TCINDEX_CLASSID]) {
-		cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
-		tcf_bind_filter(tp, &cr, base);
-	}
-
-	oldp = p;
-	r->res = cr;
-	tcf_exts_change(&r->exts, &e);
-
-	rcu_assign_pointer(tp->root, cp);
-
-	if (r == &new_filter_result) {
-		struct tcindex_filter *nfp;
-		struct tcindex_filter __rcu **fp;
-
-		f->result.res = r->res;
-		tcf_exts_change(&f->result.exts, &r->exts);
-
-		fp = cp->h + (handle % cp->hash);
-		for (nfp = rtnl_dereference(*fp);
-		     nfp;
-		     fp = &nfp->next, nfp = rtnl_dereference(*fp))
-				; /* nothing */
-
-		rcu_assign_pointer(*fp, f);
-	} else {
-		tcf_exts_destroy(&new_filter_result.exts);
-	}
-
-	if (oldp)
-		tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
-	return 0;
-
-errout_alloc:
-	if (balloc == 1)
-		tcindex_free_perfect_hash(cp);
-	else if (balloc == 2)
-		kfree(cp->h);
-	tcf_exts_destroy(&new_filter_result.exts);
-errout:
-	kfree(cp);
-	tcf_exts_destroy(&e);
-	return err;
-}
-
-static int
-tcindex_change(struct net *net, struct sk_buff *in_skb,
-	       struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct nlattr **tca, void **arg, u32 flags,
-	       struct netlink_ext_ack *extack)
-{
-	struct nlattr *opt = tca[TCA_OPTIONS];
-	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
-	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = *arg;
-	int err;
-
-	pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
-	    "p %p,r %p,*arg %p\n",
-	    tp, handle, tca, arg, opt, p, r, *arg);
-
-	if (!opt)
-		return 0;
-
-	err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt,
-					  tcindex_policy, NULL);
-	if (err < 0)
-		return err;
-
-	return tcindex_set_parms(net, tp, base, handle, p, r, tb,
-				 tca[TCA_RATE], flags, extack);
-}
-
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
-			 bool rtnl_held)
-{
-	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter *f, *next;
-	int i;
-
-	pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
-	if (p->perfect) {
-		for (i = 0; i < p->hash; i++) {
-			if (!p->perfect[i].res.class)
-				continue;
-			if (!tc_cls_stats_dump(tp, walker, p->perfect + i))
-				return;
-		}
-	}
-	if (!p->h)
-		return;
-	for (i = 0; i < p->hash; i++) {
-		for (f = rtnl_dereference(p->h[i]); f; f = next) {
-			next = rtnl_dereference(f->next);
-			if (!tc_cls_stats_dump(tp, walker, &f->result))
-				return;
-		}
-	}
-}
-
-static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
-			    struct netlink_ext_ack *extack)
-{
-	struct tcindex_data *p = rtnl_dereference(tp->root);
-	int i;
-
-	pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
-
-	if (p->perfect) {
-		for (i = 0; i < p->hash; i++) {
-			struct tcindex_filter_result *r = p->perfect + i;
-
-			/* tcf_queue_work() does not guarantee the ordering we
-			 * want, so we have to take this refcnt temporarily to
-			 * ensure 'p' is freed after all tcindex_filter_result
-			 * here. Imperfect hash does not need this, because it
-			 * uses linked lists rather than an array.
-			 */
-			tcindex_data_get(p);
-
-			tcf_unbind_filter(tp, &r->res);
-			if (tcf_exts_get_net(&r->exts))
-				tcf_queue_work(&r->rwork,
-					       tcindex_destroy_rexts_work);
-			else
-				__tcindex_destroy_rexts(r);
-		}
-	}
-
-	for (i = 0; p->h && i < p->hash; i++) {
-		struct tcindex_filter *f, *next;
-		bool last;
-
-		for (f = rtnl_dereference(p->h[i]); f; f = next) {
-			next = rtnl_dereference(f->next);
-			tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
-		}
-	}
-
-	tcf_queue_work(&p->rwork, tcindex_destroy_work);
-}
-
-
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
-			struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
-{
-	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = fh;
-	struct nlattr *nest;
-
-	pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
-		 tp, fh, skb, t, p, r);
-	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
-
-	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
-	if (nest == NULL)
-		goto nla_put_failure;
-
-	if (!fh) {
-		t->tcm_handle = ~0; /* whatever ... */
-		if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
-		    nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
-		    nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
-		    nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
-			goto nla_put_failure;
-		nla_nest_end(skb, nest);
-	} else {
-		if (p->perfect) {
-			t->tcm_handle = r - p->perfect;
-		} else {
-			struct tcindex_filter *f;
-			struct tcindex_filter __rcu **fp;
-			int i;
-
-			t->tcm_handle = 0;
-			for (i = 0; !t->tcm_handle && i < p->hash; i++) {
-				fp = &p->h[i];
-				for (f = rtnl_dereference(*fp);
-				     !t->tcm_handle && f;
-				     fp = &f->next, f = rtnl_dereference(*fp)) {
-					if (&f->result == r)
-						t->tcm_handle = f->key;
-				}
-			}
-		}
-		pr_debug("handle = %d\n", t->tcm_handle);
-		if (r->res.class &&
-		    nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
-			goto nla_put_failure;
-
-		if (tcf_exts_dump(skb, &r->exts) < 0)
-			goto nla_put_failure;
-		nla_nest_end(skb, nest);
-
-		if (tcf_exts_dump_stats(skb, &r->exts) < 0)
-			goto nla_put_failure;
-	}
-
-	return skb->len;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nest);
-	return -1;
-}
-
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
-			       void *q, unsigned long base)
-{
-	struct tcindex_filter_result *r = fh;
-
-	tc_cls_bind_class(classid, cl, q, &r->res, base);
-}
-
-static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
-	.kind		=	"tcindex",
-	.classify	=	tcindex_classify,
-	.init		=	tcindex_init,
-	.destroy	=	tcindex_destroy,
-	.get		=	tcindex_get,
-	.change		=	tcindex_change,
-	.delete		=	tcindex_delete,
-	.walk		=	tcindex_walk,
-	.dump		=	tcindex_dump,
-	.bind_class	=	tcindex_bind_class,
-	.owner		=	THIS_MODULE,
-};
-
-static int __init init_tcindex(void)
-{
-	return register_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-static void __exit exit_tcindex(void)
-{
-	unregister_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-module_init(init_tcindex)
-module_exit(exit_tcindex)
-MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json
deleted file mode 100644
index 44901db70376..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json
+++ /dev/null
@@ -1,227 +0,0 @@
-[
-    {
-        "id": "8293",
-        "name": "Add tcindex filter with default action",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "^filter parent ffff: protocol ip pref 1 tcindex chain 0 handle 0x0001 classid 1:1",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "7281",
-        "name": "Add tcindex filter with hash size and pass action",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 fall_through classid 1:1 action pass",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "b294",
-        "name": "Add tcindex filter with mask shift and reclassify action",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action reclassify",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action reclassify",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "0532",
-        "name": "Add tcindex filter with pass_on and continue actions",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 pass_on classid 1:1 action continue",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action continue",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "d473",
-        "name": "Add tcindex filter with pipe action",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action pipe",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action pipe",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "2940",
-        "name": "Add tcindex filter with miltiple actions",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 7 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action skbedit mark 7 pipe action gact drop",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 7 protocol ip tcindex",
-        "matchPattern": "^filter parent ffff: protocol ip pref 7 tcindex.*handle 0x0001.*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "1893",
-        "name": "List tcindex filters",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress",
-            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1",
-            "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 1 tcindex classid 1:1"
-        ],
-        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "handle 0x000[0-9]+ classid 1:1",
-        "matchCount": "2",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "2041",
-        "name": "Change tcindex filter with pass action",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress",
-            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop"
-        ],
-        "cmdUnderTest": "$TC filter change dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action pass",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "9203",
-        "name": "Replace tcindex filter with pass action",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress",
-            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop"
-        ],
-        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action pass",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "7957",
-        "name": "Delete tcindex filter with drop action",
-        "category": [
-            "filter",
-            "tcindex"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress",
-            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop"
-        ],
-        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
-        "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action drop",
-        "matchCount": "0",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    }
-]
-- 
cgit v1.2.3


From 265b4da82dbf5df04bee5a5d46b7474b1aaf326a Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Tue, 14 Feb 2023 08:49:15 -0500
Subject: net/sched: Retire rsvp classifier

The rsvp classifier has served us well for about a quarter of a century but has
has not been getting much maintenance attention due to lack of known users.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/tc_wrapper.h                           |  10 -
 net/sched/Kconfig                                  |  28 -
 net/sched/Makefile                                 |   2 -
 net/sched/cls_rsvp.c                               |  26 -
 net/sched/cls_rsvp.h                               | 764 ---------------------
 net/sched/cls_rsvp6.c                              |  26 -
 .../tc-testing/tc-tests/filters/rsvp.json          | 203 ------
 7 files changed, 1059 deletions(-)
 delete mode 100644 net/sched/cls_rsvp.c
 delete mode 100644 net/sched/cls_rsvp.h
 delete mode 100644 net/sched/cls_rsvp6.c
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json

(limited to 'tools/testing')

diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index 8ba241760d0a..a6d481b5bcbc 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -152,8 +152,6 @@ TC_INDIRECT_FILTER_DECLARE(flow_classify);
 TC_INDIRECT_FILTER_DECLARE(fw_classify);
 TC_INDIRECT_FILTER_DECLARE(mall_classify);
 TC_INDIRECT_FILTER_DECLARE(route4_classify);
-TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
-TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
 TC_INDIRECT_FILTER_DECLARE(u32_classify);
 
 static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -198,14 +196,6 @@ static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	if (tp->classify == route4_classify)
 		return route4_classify(skb, tp, res);
 #endif
-#if IS_BUILTIN(CONFIG_NET_CLS_RSVP)
-	if (tp->classify == rsvp_classify)
-		return rsvp_classify(skb, tp, res);
-#endif
-#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6)
-	if (tp->classify == rsvp6_classify)
-		return rsvp6_classify(skb, tp, res);
-#endif
 
 skip:
 	return tp->classify(skb, tp, res);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 111883853f08..4b95cb1ac435 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -513,34 +513,6 @@ config CLS_U32_MARK
 	help
 	  Say Y here to be able to use netfilter marks as u32 key.
 
-config NET_CLS_RSVP
-	tristate "IPv4 Resource Reservation Protocol (RSVP)"
-	select NET_CLS
-	help
-	  The Resource Reservation Protocol (RSVP) permits end systems to
-	  request a minimum and maximum data flow rate for a connection; this
-	  is important for real time data such as streaming sound or video.
-
-	  Say Y here if you want to be able to classify outgoing packets based
-	  on their RSVP requests.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called cls_rsvp.
-
-config NET_CLS_RSVP6
-	tristate "IPv6 Resource Reservation Protocol (RSVP6)"
-	select NET_CLS
-	help
-	  The Resource Reservation Protocol (RSVP) permits end systems to
-	  request a minimum and maximum data flow rate for a connection; this
-	  is important for real time data such as streaming sound or video.
-
-	  Say Y here if you want to be able to classify outgoing packets based
-	  on their RSVP requests and you are using the IPv6 protocol.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called cls_rsvp6.
-
 config NET_CLS_FLOW
 	tristate "Flow classifier"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index ea236d258c16..b5fd49641d91 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -67,8 +67,6 @@ obj-$(CONFIG_NET_SCH_TAPRIO)	+= sch_taprio.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
-obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
-obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
deleted file mode 100644
index 03d8619bd9c6..000000000000
--- a/net/sched/cls_rsvp.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp.c	Special RSVP packet classifier for IPv4.
- *
- * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/tc_wrapper.h>
-
-#define RSVP_DST_LEN	1
-#define RSVP_ID		"rsvp"
-#define RSVP_OPS	cls_rsvp_ops
-#define RSVP_CLS	rsvp_classify
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
deleted file mode 100644
index 869efba9f834..000000000000
--- a/net/sched/cls_rsvp.h
+++ /dev/null
@@ -1,764 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
- *
- * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-/*
-   Comparing to general packet classification problem,
-   RSVP needs only several relatively simple rules:
-
-   * (dst, protocol) are always specified,
-     so that we are able to hash them.
-   * src may be exact, or may be wildcard, so that
-     we can keep a hash table plus one wildcard entry.
-   * source port (or flow label) is important only if src is given.
-
-   IMPLEMENTATION.
-
-   We use a two level hash table: The top level is keyed by
-   destination address and protocol ID, every bucket contains a list
-   of "rsvp sessions", identified by destination address, protocol and
-   DPI(="Destination Port ID"): triple (key, mask, offset).
-
-   Every bucket has a smaller hash table keyed by source address
-   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
-   Every bucket is again a list of "RSVP flows", selected by
-   source address and SPI(="Source Port ID" here rather than
-   "security parameter index"): triple (key, mask, offset).
-
-
-   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
-   and all fragmented packets go to the best-effort traffic class.
-
-
-   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
-   only one "Generalized Port Identifier". So that for classic
-   ah, esp (and udp,tcp) both *pi should coincide or one of them
-   should be wildcard.
-
-   At first sight, this redundancy is just a waste of CPU
-   resources. But DPI and SPI add the possibility to assign different
-   priorities to GPIs. Look also at note 4 about tunnels below.
-
-
-   NOTE 3. One complication is the case of tunneled packets.
-   We implement it as following: if the first lookup
-   matches a special session with "tunnelhdr" value not zero,
-   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
-   In this case, we pull tunnelhdr bytes and restart lookup
-   with tunnel ID added to the list of keys. Simple and stupid 8)8)
-   It's enough for PIMREG and IPIP.
-
-
-   NOTE 4. Two GPIs make it possible to parse even GRE packets.
-   F.e. DPI can select ETH_P_IP (and necessary flags to make
-   tunnelhdr correct) in GRE protocol field and SPI matches
-   GRE key. Is it not nice? 8)8)
-
-
-   Well, as result, despite its simplicity, we get a pretty
-   powerful classification engine.  */
-
-
-struct rsvp_head {
-	u32			tmap[256/32];
-	u32			hgenerator;
-	u8			tgenerator;
-	struct rsvp_session __rcu *ht[256];
-	struct rcu_head		rcu;
-};
-
-struct rsvp_session {
-	struct rsvp_session __rcu	*next;
-	__be32				dst[RSVP_DST_LEN];
-	struct tc_rsvp_gpi		dpi;
-	u8				protocol;
-	u8				tunnelid;
-	/* 16 (src,sport) hash slots, and one wildcard source slot */
-	struct rsvp_filter __rcu	*ht[16 + 1];
-	struct rcu_head			rcu;
-};
-
-
-struct rsvp_filter {
-	struct rsvp_filter __rcu	*next;
-	__be32				src[RSVP_DST_LEN];
-	struct tc_rsvp_gpi		spi;
-	u8				tunnelhdr;
-
-	struct tcf_result		res;
-	struct tcf_exts			exts;
-
-	u32				handle;
-	struct rsvp_session		*sess;
-	struct rcu_work			rwork;
-};
-
-static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
-{
-	unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
-
-	h ^= h>>16;
-	h ^= h>>8;
-	return (h ^ protocol ^ tunnelid) & 0xFF;
-}
-
-static inline unsigned int hash_src(__be32 *src)
-{
-	unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
-
-	h ^= h>>16;
-	h ^= h>>8;
-	h ^= h>>4;
-	return h & 0xF;
-}
-
-#define RSVP_APPLY_RESULT()				\
-{							\
-	int r = tcf_exts_exec(skb, &f->exts, res);	\
-	if (r < 0)					\
-		continue;				\
-	else if (r > 0)					\
-		return r;				\
-}
-
-TC_INDIRECT_SCOPE int RSVP_CLS(struct sk_buff *skb, const struct tcf_proto *tp,
-			       struct tcf_result *res)
-{
-	struct rsvp_head *head = rcu_dereference_bh(tp->root);
-	struct rsvp_session *s;
-	struct rsvp_filter *f;
-	unsigned int h1, h2;
-	__be32 *dst, *src;
-	u8 protocol;
-	u8 tunnelid = 0;
-	u8 *xprt;
-#if RSVP_DST_LEN == 4
-	struct ipv6hdr *nhptr;
-
-	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
-		return -1;
-	nhptr = ipv6_hdr(skb);
-#else
-	struct iphdr *nhptr;
-
-	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
-		return -1;
-	nhptr = ip_hdr(skb);
-#endif
-restart:
-
-#if RSVP_DST_LEN == 4
-	src = &nhptr->saddr.s6_addr32[0];
-	dst = &nhptr->daddr.s6_addr32[0];
-	protocol = nhptr->nexthdr;
-	xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
-#else
-	src = &nhptr->saddr;
-	dst = &nhptr->daddr;
-	protocol = nhptr->protocol;
-	xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
-	if (ip_is_fragment(nhptr))
-		return -1;
-#endif
-
-	h1 = hash_dst(dst, protocol, tunnelid);
-	h2 = hash_src(src);
-
-	for (s = rcu_dereference_bh(head->ht[h1]); s;
-	     s = rcu_dereference_bh(s->next)) {
-		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
-		    protocol == s->protocol &&
-		    !(s->dpi.mask &
-		      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
-#if RSVP_DST_LEN == 4
-		    dst[0] == s->dst[0] &&
-		    dst[1] == s->dst[1] &&
-		    dst[2] == s->dst[2] &&
-#endif
-		    tunnelid == s->tunnelid) {
-
-			for (f = rcu_dereference_bh(s->ht[h2]); f;
-			     f = rcu_dereference_bh(f->next)) {
-				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
-				    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
-#if RSVP_DST_LEN == 4
-				    &&
-				    src[0] == f->src[0] &&
-				    src[1] == f->src[1] &&
-				    src[2] == f->src[2]
-#endif
-				    ) {
-					*res = f->res;
-					RSVP_APPLY_RESULT();
-
-matched:
-					if (f->tunnelhdr == 0)
-						return 0;
-
-					tunnelid = f->res.classid;
-					nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
-					goto restart;
-				}
-			}
-
-			/* And wildcard bucket... */
-			for (f = rcu_dereference_bh(s->ht[16]); f;
-			     f = rcu_dereference_bh(f->next)) {
-				*res = f->res;
-				RSVP_APPLY_RESULT();
-				goto matched;
-			}
-			return -1;
-		}
-	}
-	return -1;
-}
-
-static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
-{
-	struct rsvp_head *head = rtnl_dereference(tp->root);
-	struct rsvp_session *s;
-	struct rsvp_filter __rcu **ins;
-	struct rsvp_filter *pins;
-	unsigned int h1 = h & 0xFF;
-	unsigned int h2 = (h >> 8) & 0xFF;
-
-	for (s = rtnl_dereference(head->ht[h1]); s;
-	     s = rtnl_dereference(s->next)) {
-		for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
-		     ins = &pins->next, pins = rtnl_dereference(*ins)) {
-			if (pins->handle == h) {
-				RCU_INIT_POINTER(n->next, pins->next);
-				rcu_assign_pointer(*ins, n);
-				return;
-			}
-		}
-	}
-
-	/* Something went wrong if we are trying to replace a non-existent
-	 * node. Mind as well halt instead of silently failing.
-	 */
-	BUG_ON(1);
-}
-
-static void *rsvp_get(struct tcf_proto *tp, u32 handle)
-{
-	struct rsvp_head *head = rtnl_dereference(tp->root);
-	struct rsvp_session *s;
-	struct rsvp_filter *f;
-	unsigned int h1 = handle & 0xFF;
-	unsigned int h2 = (handle >> 8) & 0xFF;
-
-	if (h2 > 16)
-		return NULL;
-
-	for (s = rtnl_dereference(head->ht[h1]); s;
-	     s = rtnl_dereference(s->next)) {
-		for (f = rtnl_dereference(s->ht[h2]); f;
-		     f = rtnl_dereference(f->next)) {
-			if (f->handle == handle)
-				return f;
-		}
-	}
-	return NULL;
-}
-
-static int rsvp_init(struct tcf_proto *tp)
-{
-	struct rsvp_head *data;
-
-	data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
-	if (data) {
-		rcu_assign_pointer(tp->root, data);
-		return 0;
-	}
-	return -ENOBUFS;
-}
-
-static void __rsvp_delete_filter(struct rsvp_filter *f)
-{
-	tcf_exts_destroy(&f->exts);
-	tcf_exts_put_net(&f->exts);
-	kfree(f);
-}
-
-static void rsvp_delete_filter_work(struct work_struct *work)
-{
-	struct rsvp_filter *f = container_of(to_rcu_work(work),
-					     struct rsvp_filter,
-					     rwork);
-	rtnl_lock();
-	__rsvp_delete_filter(f);
-	rtnl_unlock();
-}
-
-static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
-{
-	tcf_unbind_filter(tp, &f->res);
-	/* all classifiers are required to call tcf_exts_destroy() after rcu
-	 * grace period, since converted-to-rcu actions are relying on that
-	 * in cleanup() callback
-	 */
-	if (tcf_exts_get_net(&f->exts))
-		tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
-	else
-		__rsvp_delete_filter(f);
-}
-
-static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
-			 struct netlink_ext_ack *extack)
-{
-	struct rsvp_head *data = rtnl_dereference(tp->root);
-	int h1, h2;
-
-	if (data == NULL)
-		return;
-
-	for (h1 = 0; h1 < 256; h1++) {
-		struct rsvp_session *s;
-
-		while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
-			RCU_INIT_POINTER(data->ht[h1], s->next);
-
-			for (h2 = 0; h2 <= 16; h2++) {
-				struct rsvp_filter *f;
-
-				while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
-					rcu_assign_pointer(s->ht[h2], f->next);
-					rsvp_delete_filter(tp, f);
-				}
-			}
-			kfree_rcu(s, rcu);
-		}
-	}
-	kfree_rcu(data, rcu);
-}
-
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
-		       bool rtnl_held, struct netlink_ext_ack *extack)
-{
-	struct rsvp_head *head = rtnl_dereference(tp->root);
-	struct rsvp_filter *nfp, *f = arg;
-	struct rsvp_filter __rcu **fp;
-	unsigned int h = f->handle;
-	struct rsvp_session __rcu **sp;
-	struct rsvp_session *nsp, *s = f->sess;
-	int i, h1;
-
-	fp = &s->ht[(h >> 8) & 0xFF];
-	for (nfp = rtnl_dereference(*fp); nfp;
-	     fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
-		if (nfp == f) {
-			RCU_INIT_POINTER(*fp, f->next);
-			rsvp_delete_filter(tp, f);
-
-			/* Strip tree */
-
-			for (i = 0; i <= 16; i++)
-				if (s->ht[i])
-					goto out;
-
-			/* OK, session has no flows */
-			sp = &head->ht[h & 0xFF];
-			for (nsp = rtnl_dereference(*sp); nsp;
-			     sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
-				if (nsp == s) {
-					RCU_INIT_POINTER(*sp, s->next);
-					kfree_rcu(s, rcu);
-					goto out;
-				}
-			}
-
-			break;
-		}
-	}
-
-out:
-	*last = true;
-	for (h1 = 0; h1 < 256; h1++) {
-		if (rcu_access_pointer(head->ht[h1])) {
-			*last = false;
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
-{
-	struct rsvp_head *data = rtnl_dereference(tp->root);
-	int i = 0xFFFF;
-
-	while (i-- > 0) {
-		u32 h;
-
-		if ((data->hgenerator += 0x10000) == 0)
-			data->hgenerator = 0x10000;
-		h = data->hgenerator|salt;
-		if (!rsvp_get(tp, h))
-			return h;
-	}
-	return 0;
-}
-
-static int tunnel_bts(struct rsvp_head *data)
-{
-	int n = data->tgenerator >> 5;
-	u32 b = 1 << (data->tgenerator & 0x1F);
-
-	if (data->tmap[n] & b)
-		return 0;
-	data->tmap[n] |= b;
-	return 1;
-}
-
-static void tunnel_recycle(struct rsvp_head *data)
-{
-	struct rsvp_session __rcu **sht = data->ht;
-	u32 tmap[256/32];
-	int h1, h2;
-
-	memset(tmap, 0, sizeof(tmap));
-
-	for (h1 = 0; h1 < 256; h1++) {
-		struct rsvp_session *s;
-		for (s = rtnl_dereference(sht[h1]); s;
-		     s = rtnl_dereference(s->next)) {
-			for (h2 = 0; h2 <= 16; h2++) {
-				struct rsvp_filter *f;
-
-				for (f = rtnl_dereference(s->ht[h2]); f;
-				     f = rtnl_dereference(f->next)) {
-					if (f->tunnelhdr == 0)
-						continue;
-					data->tgenerator = f->res.classid;
-					tunnel_bts(data);
-				}
-			}
-		}
-	}
-
-	memcpy(data->tmap, tmap, sizeof(tmap));
-}
-
-static u32 gen_tunnel(struct rsvp_head *data)
-{
-	int i, k;
-
-	for (k = 0; k < 2; k++) {
-		for (i = 255; i > 0; i--) {
-			if (++data->tgenerator == 0)
-				data->tgenerator = 1;
-			if (tunnel_bts(data))
-				return data->tgenerator;
-		}
-		tunnel_recycle(data);
-	}
-	return 0;
-}
-
-static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
-	[TCA_RSVP_CLASSID]	= { .type = NLA_U32 },
-	[TCA_RSVP_DST]		= { .len = RSVP_DST_LEN * sizeof(u32) },
-	[TCA_RSVP_SRC]		= { .len = RSVP_DST_LEN * sizeof(u32) },
-	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
-};
-
-static int rsvp_change(struct net *net, struct sk_buff *in_skb,
-		       struct tcf_proto *tp, unsigned long base,
-		       u32 handle, struct nlattr **tca,
-		       void **arg, u32 flags,
-		       struct netlink_ext_ack *extack)
-{
-	struct rsvp_head *data = rtnl_dereference(tp->root);
-	struct rsvp_filter *f, *nfp;
-	struct rsvp_filter __rcu **fp;
-	struct rsvp_session *nsp, *s;
-	struct rsvp_session __rcu **sp;
-	struct tc_rsvp_pinfo *pinfo = NULL;
-	struct nlattr *opt = tca[TCA_OPTIONS];
-	struct nlattr *tb[TCA_RSVP_MAX + 1];
-	struct tcf_exts e;
-	unsigned int h1, h2;
-	__be32 *dst;
-	int err;
-
-	if (opt == NULL)
-		return handle ? -EINVAL : 0;
-
-	err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy,
-					  NULL);
-	if (err < 0)
-		return err;
-
-	err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
-	if (err < 0)
-		return err;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
-				extack);
-	if (err < 0)
-		goto errout2;
-
-	f = *arg;
-	if (f) {
-		/* Node exists: adjust only classid */
-		struct rsvp_filter *n;
-
-		if (f->handle != handle && handle)
-			goto errout2;
-
-		n = kmemdup(f, sizeof(*f), GFP_KERNEL);
-		if (!n) {
-			err = -ENOMEM;
-			goto errout2;
-		}
-
-		err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
-				    TCA_RSVP_POLICE);
-		if (err < 0) {
-			kfree(n);
-			goto errout2;
-		}
-
-		if (tb[TCA_RSVP_CLASSID]) {
-			n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
-			tcf_bind_filter(tp, &n->res, base);
-		}
-
-		tcf_exts_change(&n->exts, &e);
-		rsvp_replace(tp, n, handle);
-		return 0;
-	}
-
-	/* Now more serious part... */
-	err = -EINVAL;
-	if (handle)
-		goto errout2;
-	if (tb[TCA_RSVP_DST] == NULL)
-		goto errout2;
-
-	err = -ENOBUFS;
-	f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
-	if (f == NULL)
-		goto errout2;
-
-	err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
-	if (err < 0)
-		goto errout;
-	h2 = 16;
-	if (tb[TCA_RSVP_SRC]) {
-		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
-		h2 = hash_src(f->src);
-	}
-	if (tb[TCA_RSVP_PINFO]) {
-		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
-		f->spi = pinfo->spi;
-		f->tunnelhdr = pinfo->tunnelhdr;
-	}
-	if (tb[TCA_RSVP_CLASSID])
-		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
-
-	dst = nla_data(tb[TCA_RSVP_DST]);
-	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
-
-	err = -ENOMEM;
-	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
-		goto errout;
-
-	if (f->tunnelhdr) {
-		err = -EINVAL;
-		if (f->res.classid > 255)
-			goto errout;
-
-		err = -ENOMEM;
-		if (f->res.classid == 0 &&
-		    (f->res.classid = gen_tunnel(data)) == 0)
-			goto errout;
-	}
-
-	for (sp = &data->ht[h1];
-	     (s = rtnl_dereference(*sp)) != NULL;
-	     sp = &s->next) {
-		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
-		    pinfo && pinfo->protocol == s->protocol &&
-		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
-#if RSVP_DST_LEN == 4
-		    dst[0] == s->dst[0] &&
-		    dst[1] == s->dst[1] &&
-		    dst[2] == s->dst[2] &&
-#endif
-		    pinfo->tunnelid == s->tunnelid) {
-
-insert:
-			/* OK, we found appropriate session */
-
-			fp = &s->ht[h2];
-
-			f->sess = s;
-			if (f->tunnelhdr == 0)
-				tcf_bind_filter(tp, &f->res, base);
-
-			tcf_exts_change(&f->exts, &e);
-
-			fp = &s->ht[h2];
-			for (nfp = rtnl_dereference(*fp); nfp;
-			     fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
-				__u32 mask = nfp->spi.mask & f->spi.mask;
-
-				if (mask != f->spi.mask)
-					break;
-			}
-			RCU_INIT_POINTER(f->next, nfp);
-			rcu_assign_pointer(*fp, f);
-
-			*arg = f;
-			return 0;
-		}
-	}
-
-	/* No session found. Create new one. */
-
-	err = -ENOBUFS;
-	s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
-	if (s == NULL)
-		goto errout;
-	memcpy(s->dst, dst, sizeof(s->dst));
-
-	if (pinfo) {
-		s->dpi = pinfo->dpi;
-		s->protocol = pinfo->protocol;
-		s->tunnelid = pinfo->tunnelid;
-	}
-	sp = &data->ht[h1];
-	for (nsp = rtnl_dereference(*sp); nsp;
-	     sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
-		if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
-			break;
-	}
-	RCU_INIT_POINTER(s->next, nsp);
-	rcu_assign_pointer(*sp, s);
-
-	goto insert;
-
-errout:
-	tcf_exts_destroy(&f->exts);
-	kfree(f);
-errout2:
-	tcf_exts_destroy(&e);
-	return err;
-}
-
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
-		      bool rtnl_held)
-{
-	struct rsvp_head *head = rtnl_dereference(tp->root);
-	unsigned int h, h1;
-
-	if (arg->stop)
-		return;
-
-	for (h = 0; h < 256; h++) {
-		struct rsvp_session *s;
-
-		for (s = rtnl_dereference(head->ht[h]); s;
-		     s = rtnl_dereference(s->next)) {
-			for (h1 = 0; h1 <= 16; h1++) {
-				struct rsvp_filter *f;
-
-				for (f = rtnl_dereference(s->ht[h1]); f;
-				     f = rtnl_dereference(f->next)) {
-					if (!tc_cls_stats_dump(tp, arg, f))
-						return;
-				}
-			}
-		}
-	}
-}
-
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
-{
-	struct rsvp_filter *f = fh;
-	struct rsvp_session *s;
-	struct nlattr *nest;
-	struct tc_rsvp_pinfo pinfo;
-
-	if (f == NULL)
-		return skb->len;
-	s = f->sess;
-
-	t->tcm_handle = f->handle;
-
-	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
-	if (nest == NULL)
-		goto nla_put_failure;
-
-	if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
-		goto nla_put_failure;
-	pinfo.dpi = s->dpi;
-	pinfo.spi = f->spi;
-	pinfo.protocol = s->protocol;
-	pinfo.tunnelid = s->tunnelid;
-	pinfo.tunnelhdr = f->tunnelhdr;
-	pinfo.pad = 0;
-	if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
-		goto nla_put_failure;
-	if (f->res.classid &&
-	    nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
-		goto nla_put_failure;
-	if (((f->handle >> 8) & 0xFF) != 16 &&
-	    nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
-		goto nla_put_failure;
-
-	if (tcf_exts_dump(skb, &f->exts) < 0)
-		goto nla_put_failure;
-
-	nla_nest_end(skb, nest);
-
-	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
-		goto nla_put_failure;
-	return skb->len;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nest);
-	return -1;
-}
-
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
-			    unsigned long base)
-{
-	struct rsvp_filter *f = fh;
-
-	tc_cls_bind_class(classid, cl, q, &f->res, base);
-}
-
-static struct tcf_proto_ops RSVP_OPS __read_mostly = {
-	.kind		=	RSVP_ID,
-	.classify	=	RSVP_CLS,
-	.init		=	rsvp_init,
-	.destroy	=	rsvp_destroy,
-	.get		=	rsvp_get,
-	.change		=	rsvp_change,
-	.delete		=	rsvp_delete,
-	.walk		=	rsvp_walk,
-	.dump		=	rsvp_dump,
-	.bind_class	=	rsvp_bind_class,
-	.owner		=	THIS_MODULE,
-};
-
-static int __init init_rsvp(void)
-{
-	return register_tcf_proto_ops(&RSVP_OPS);
-}
-
-static void __exit exit_rsvp(void)
-{
-	unregister_tcf_proto_ops(&RSVP_OPS);
-}
-
-module_init(init_rsvp)
-module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
deleted file mode 100644
index e627cc32d633..000000000000
--- a/net/sched/cls_rsvp6.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp6.c	Special RSVP packet classifier for IPv6.
- *
- * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ipv6.h>
-#include <linux/skbuff.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/netlink.h>
-#include <net/tc_wrapper.h>
-
-#define RSVP_DST_LEN	4
-#define RSVP_ID		"rsvp6"
-#define RSVP_OPS	cls_rsvp6_ops
-#define RSVP_CLS rsvp6_classify
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json b/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json
deleted file mode 100644
index bdcbaa4c5663..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json
+++ /dev/null
@@ -1,203 +0,0 @@
-[
-    {
-        "id": "2141",
-        "name": "Add rsvp filter with tcp proto and specific IP address",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto tcp session 198.168.10.64",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*session 198.168.10.64 ipproto tcp",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "5267",
-        "name": "Add rsvp filter with udp proto and specific IP address",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*session 1.1.1.1 ipproto udp",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "2819",
-        "name": "Add rsvp filter with src ip and src port",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 sender 2.2.2.2/5021 classid 1:1",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp sender  2.2.2.2/5021",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "c967",
-        "name": "Add rsvp filter with tunnelid and continue action",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnelid 2 classid 1:1 action continue",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp tunnelid 2.*action order [0-9]+: gact action continue",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "5463",
-        "name": "Add rsvp filter with tunnel and pipe action",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnel 2 skip 1 action pipe",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*tunnel 2 skip 1 session 1.1.1.1 ipproto udp.*action order [0-9]+: gact action pipe",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "2332",
-        "name": "Add rsvp filter with miltiple actions",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 7 rsvp ipproto udp session 1.1.1.1 classid 1:1 action skbedit mark 7 pipe action gact drop",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp.*action order [0-9]+: skbedit  mark 7 pipe.*action order [0-9]+: gact action drop",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "8879",
-        "name": "Add rsvp filter with tunnel and skp flag",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnel 2 skip 1 action pipe",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*tunnel 2 skip 1 session 1.1.1.1 ipproto udp.*action order [0-9]+: gact action pipe",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "8261",
-        "name": "List rsvp filters",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress",
-            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 classid 1:1",
-            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto tcp session 2.2.2.2/1234 classid 2:1"
-        ],
-        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh",
-        "matchCount": "2",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    },
-    {
-        "id": "8989",
-        "name": "Delete rsvp filter",
-        "category": [
-            "filter",
-            "rsvp"
-        ],
-        "plugins": {
-            "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 ingress",
-            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 tunnelid 9 classid 2:1"
-        ],
-        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 tunnelid 9 classid 2:1",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
-        "matchPattern": "filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 2:1 session 1.1.1.1/1234 ipproto udp tunnelid 9",
-        "matchCount": "0",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 ingress"
-        ]
-    }
-]
-- 
cgit v1.2.3


From 5198cb408fcfc0b49c418314570423efe2217754 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Wed, 15 Feb 2023 14:46:59 +0100
Subject: selftests: seg6: add selftest for PSP flavor in SRv6 End behavior

This selftest is designed for testing the PSP flavor in SRv6 End behavior.
It instantiates a virtual network composed of several nodes: hosts and
SRv6 routers. Each node is realized using a network namespace that is
properly interconnected to others through veth pairs.
The test makes use of the SRv6 End behavior and of the PSP flavor needed
for removing the SRH from the IPv6 header at the penultimate node.

The correct execution of the behavior is verified through reachability
tests carried out between hosts.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/Makefile               |   1 +
 .../testing/selftests/net/srv6_end_flavors_test.sh | 869 +++++++++++++++++++++
 2 files changed, 870 insertions(+)
 create mode 100755 tools/testing/selftests/net/srv6_end_flavors_test.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3364c548a23b..6cd8993454d7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -38,6 +38,7 @@ TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
 TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
 TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
+TEST_PROGS += srv6_end_flavors_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
 TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS += ndisc_unsolicited_na_test.sh
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
new file mode 100755
index 000000000000..50563443a4ad
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -0,0 +1,869 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+#
+# This script is designed to test the support for "flavors" in the SRv6 End
+# behavior.
+#
+# Flavors defined in RFC8986 [1] represent additional operations that can modify
+# or extend the existing SRv6 End, End.X and End.T behaviors. For the sake of
+# convenience, we report the list of flavors described in [1] hereafter:
+#   - Penultimate Segment Pop (PSP);
+#   - Ultimate Segment Pop (USP);
+#   - Ultimate Segment Decapsulation (USD).
+#
+# The End, End.X, and End.T behaviors can support these flavors either
+# individually or in combinations.
+# Currently in this selftest we consider only the PSP flavor for the SRv6 End
+# behavior. However, it is possible to extend the script as soon as other
+# flavors will be supported in the kernel.
+#
+# The purpose of the PSP flavor consists in instructing the penultimate node
+# listed in the SRv6 policy to remove (i.e. pop) the outermost SRH from the IPv6
+# header.
+# A PSP enabled SRv6 End behavior instance processes the SRH by:
+#  - decrementing the Segment Left (SL) value from 1 to 0;
+#  - copying the last SID from the SID List into the IPv6 Destination Address
+#    (DA);
+#  - removing the SRH from the extension headers following the IPv6 header.
+#
+# Once the SRH is removed, the IPv6 packet is forwarded to the destination using
+# the IPv6 DA updated during the PSP operation (i.e. the IPv6 DA corresponding
+# to the last SID carried by the removed SRH).
+#
+# Although the PSP flavor can be set for any SRv6 End behavior instance on any
+# SR node, it will be active only on such behaviors bound to a penultimate SID
+# for a given SRv6 policy.
+#                                                SL=2 SL=1 SL=0
+#                                                  |    |    |
+# For example, given the SRv6 policy (SID List := <X,   Y,   Z>):
+#  - a PSP enabled SRv6 End behavior bound to SID Y will apply the PSP operation
+#    as Segment Left (SL) is 1, corresponding to the Penultimate Segment of the
+#    SID List;
+#  - a PSP enabled SRv6 End behavior bound to SID X will *NOT* apply the PSP
+#    operation as the Segment Left is 2. This behavior instance will apply the
+#    "standard" End packet processing, ignoring the configured PSP flavor at
+#    all.
+#
+# [1] RFC8986: https://datatracker.ietf.org/doc/html/rfc8986
+#
+# Network topology
+# ================
+#
+# The network topology used in this selftest is depicted hereafter, composed by
+# two hosts (hs-1, hs-2) and four routers (rt-1, rt-2, rt-3, rt-4).
+# Hosts hs-1 and hs-2 are connected to routers rt-1 and rt-2, respectively,
+# allowing them to communicate with each other.
+# Traffic exchanged between hs-1 and hs-2 can follow different network paths.
+# The network operator, through specific SRv6 Policies can steer traffic to one
+# path rather than another. In this selftest this is implemented as follows:
+#
+#   i) The SRv6 H.Insert behavior applies SRv6 Policies on traffic received by
+#      connected hosts. It pushes the Segment Routing Header (SRH) after the
+#      IPv6 header. The SRH contains the SID List (i.e. SRv6 Policy) needed for
+#      steering traffic across the segments/waypoints specified in that list;
+#
+#  ii) The SRv6 End behavior advances the active SID in the SID List carried by
+#      the SRH;
+#
+# iii) The PSP enabled SRv6 End behavior is used to remove the SRH when such
+#      behavior is configured on a node bound to the Penultimate Segment carried
+#      by the SID List.
+#
+#                cafe::1                      cafe::2
+#              +--------+                   +--------+
+#              |        |                   |        |
+#              |  hs-1  |                   |  hs-2  |
+#              |        |                   |        |
+#              +---+----+                   +--- +---+
+#     cafe::/64    |                             |      cafe::/64
+#                  |                             |
+#              +---+----+                   +----+---+
+#              |        |  fcf0:0:1:2::/64  |        |
+#              |  rt-1  +-------------------+  rt-2  |
+#              |        |                   |        |
+#              +---+----+                   +----+---+
+#                  |      .               .      |
+#                  |  fcf0:0:1:3::/64   .        |
+#                  |          .       .          |
+#                  |            .   .            |
+#  fcf0:0:1:4::/64 |              .              | fcf0:0:2:3::/64
+#                  |            .   .            |
+#                  |          .       .          |
+#                  |  fcf0:0:2:4::/64   .        |
+#                  |      .               .      |
+#              +---+----+                   +----+---+
+#              |        |                   |        |
+#              |  rt-4  +-------------------+  rt-3  |
+#              |        |  fcf0:0:3:4::/64  |        |
+#              +---+----+                   +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the IPv6 operator network.
+#
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+#   Local SID table for SRv6 router rt-x
+#   +---------------------------------------------------------------------+
+#   |fcff:x::e is associated with the SRv6 End behavior                   |
+#   |fcff:x::ef1 is associated with the SRv6 End behavior with PSP flavor |
+#   +---------------------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for the SIDs. Reachability of
+# SIDs is ensured by proper configuration of the IPv6 operator's network and
+# SRv6 routers.
+#
+#
+# SRv6 Policies
+# =============
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from connected hosts on the basis of the destination addresses.
+# In case of SRv6 H.Insert behavior, the SRv6 Policy enforcement consists of
+# pushing the SRH (carrying a given SID List) after the existing IPv6 header.
+# Note that in the inserting mode, there is no encapsulation at all.
+#
+#   Before applying an SRv6 Policy using the SRv6 H.Insert behavior
+#   +------+---------+
+#   | IPv6 | Payload |
+#   +------+---------+
+#
+#   After applying an SRv6 Policy using the SRv6 H.Insert behavior
+#   +------+-----+---------+
+#   | IPv6 | SRH | Payload |
+#   +------+-----+---------+
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policy:
+#
+#   i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::ef1,fcff:2::ef1,cafe::2
+#
+# Router rt-1 is configured to enforce the Policy (i.a) through the SRv6
+# H.Insert behavior which pushes the SRH after the existing IPv6 header. This
+# Policy steers the traffic from hs-1 across rt-3, rt-4, rt-2 and finally to the
+# destination hs-2.
+#
+# As the packet reaches the router rt-3, the SRv6 End behavior bound to SID
+# fcff:3::e is triggered. The behavior updates the Segment Left (from SL=3 to
+# SL=2) in the SRH, the IPv6 DA with fcff:4::ef1 and forwards the packet to the
+# next router on the path, i.e. rt-4.
+#
+# When router rt-4 receives the packet, the PSP enabled SRv6 End behavior bound
+# to SID fcff:4::ef1 is executed. Since the SL=2, the PSP operation is *NOT*
+# kicked in and the behavior applies the default End processing: the Segment
+# Left is decreased (from SL=2 to SL=1), the IPv6 DA is updated with the SID
+# fcff:2::ef1 and the packet is forwarded to router rt-2.
+#
+# The PSP enabled SRv6 End behavior on rt-2 is associated with SID fcff:2::ef1
+# and is executed as the packet is received. Because SL=1, the behavior applies
+# the PSP processing on the packet as follows: i) SL is decreased, i.e. from
+# SL=1 to SL=0; ii) last SID (cafe::2) is copied into the IPv6 DA; iii) the
+# outermost SRH is removed from the extension headers following the IPv6 header.
+# Once the PSP processing is completed, the packet is forwarded to the host hs-2
+# (destination).
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed to hs-1 are handled by rt-2 which
+# applies the following SRv6 Policy:
+#
+#   i.b) IPv6 traffic, SID List=fcff:1::ef1,cafe::1
+#
+# Router rt-2 is configured to enforce the Policy (i.b) through the SRv6
+# H.Insert behavior which pushes the SRH after the existing IPv6 header. This
+# Policy steers the traffic from hs-2 across rt-1 and finally to the
+# destination hs-1
+#
+#
+# When the router rt-1 receives the packet, the PSP enabled SRv6 End behavior
+# associated with the SID fcff:1::ef1 is triggered. Since the SL=1,
+# the PSP operation takes place: i) the SL is decremented; ii) the IPv6 DA is
+# set with the last SID; iii) the SRH is removed from the extension headers
+# after the IPv6 header. At this point, the packet with IPv6 DA=cafe::1 is sent
+# to the destination, i.e. hs-1.
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly RT2HS_DEVNAME="veth1"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv6_TESTS_ADDR=2001:db8::1
+readonly LOCATOR_SERVICE=fcff
+readonly END_FUNC=000e
+readonly END_PSP_FUNC=0ef1
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+	local rc="$1"
+	local expected="$2"
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	printf "\nTests passed: %3d\n" "${nsuccess}"
+	printf "Tests failed: %3d\n"   "${nfail}"
+
+	# when a test fails, the value of 'ret' is set to 1 (error code).
+	# Conversely, when all tests are passed successfully, the 'ret' value
+	# is set to 0 (success code).
+	if [ "${ret}" -ne 1 ]; then
+		ret=0
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+	local cmd="$1"
+
+	if [ ! -x "$(command -v "${cmd}")" ]; then
+		echo "SKIP: Could not run test without \"${cmd}\" tool";
+		exit "${ksft_skip}"
+	fi
+}
+
+get_nodename()
+{
+	local name="$1"
+
+	echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+	local rtid="$1"
+
+	get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+	local hsid="$1"
+
+	get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+	local name="$1"
+
+	ip netns add "${name}"
+}
+
+create_router()
+{
+	local rtid="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rtid}")"
+
+	__create_namespace "${nsname}"
+}
+
+create_host()
+{
+	local hsid="$1"
+	local nsname
+
+	nsname="$(get_hsname "${hsid}")"
+
+	__create_namespace "${nsname}"
+}
+
+cleanup()
+{
+	local nsname
+	local i
+
+	# destroy routers
+	for i in ${ROUTERS}; do
+		nsname="$(get_rtname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# destroy hosts
+	for i in ${HOSTS}; do
+		nsname="$(get_hsname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# check whether the setup phase was completed successfully or not. In
+	# case of an error during the setup phase of the testing environment,
+	# the selftest is considered as "skipped".
+	if [ "${SETUP_ERR}" -ne 0 ]; then
+		echo "SKIP: Setting up the testing environment failed"
+		exit "${ksft_skip}"
+	fi
+
+	exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local neigh
+	local nsname
+	local neigh_nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		neigh_nsname="$(get_rtname "${neigh}")"
+
+		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+			type veth peer name "veth-rt-${neigh}-${rt}" \
+			netns "${neigh_nsname}"
+	done
+}
+
+get_network_prefix()
+{
+	local rt="$1"
+	local neigh="$2"
+	local p="${rt}"
+	local q="${neigh}"
+
+	if [ "${p}" -gt "${q}" ]; then
+		p="${q}"; q="${rt}"
+	fi
+
+	echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Given the description of a router <id:op> as an input, the function returns
+# the <id> token which represents the ID of the router.
+# i.e. input: "12:psp"
+#      output: "12"
+__get_srv6_rtcfg_id()
+{
+	local element="$1"
+
+	echo "${element}" | cut -d':' -f1
+}
+
+# Given the description of a router <id:op> as an input, the function returns
+# the <op> token which represents the operation (e.g. End behavior with or
+# withouth flavors) configured for the node.
+
+# Note that when the operation represents an End behavior with a list of
+# flavors, the output is the ordered version of that list.
+# i.e. input: "5:usp,psp,usd"
+#      output: "psp,usd,usp"
+__get_srv6_rtcfg_op()
+{
+	local element="$1"
+
+	# return the lexicographically ordered flavors
+	echo "${element}" | cut -d':' -f2 | sed 's/,/\n/g' | sort | \
+		xargs | sed 's/ /,/g'
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local nsname
+	local net_prefix
+	local devname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		ip -netns "${nsname}" addr \
+			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+		ip -netns "${nsname}" link set "${devname}" up
+	done
+
+	ip -netns "${nsname}" link set lo up
+
+	ip -netns "${nsname}" link add ${DUMMY_DEVNAME} type dummy
+	ip -netns "${nsname}" link set ${DUMMY_DEVNAME} up
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local net_prefix
+	local devname
+	local nsname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		# set underlay network routes for SIDs reachability
+		ip -netns "${nsname}" -6 route \
+			add "${LOCATOR_SERVICE}:${neigh}::/32" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+	done
+
+	# Local End behavior (note that "dev" is a dummy interface chosen for
+	# the sake of simplicity).
+	ip -netns "${nsname}" -6 route \
+		add "${LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End dev "${DUMMY_DEVNAME}"
+
+
+	# all SIDs start with a common locator. Routes and SRv6 Endpoint
+	# behavior instaces are grouped together in the 'localsid' table.
+	ip -netns "${nsname}" -6 rule \
+		add to "${LOCATOR_SERVICE}::/16" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+
+	# set default routes to unreachable
+	ip -netns "${nsname}" -6 route \
+		add unreachable default metric 4278198272 \
+		dev "${DUMMY_DEVNAME}"
+}
+
+# This helper function builds and installs the SID List (i.e. SRv6 Policy)
+# to be applied on incoming packets at the ingress node. Moreover, it
+# configures the SRv6 nodes specified in the SID List to process the traffic
+# according to the operations required by the Policy itself.
+# args:
+#  $1 - destination host (i.e. cafe::x host)
+#  $2 - SRv6 router configured for enforcing the SRv6 Policy
+#  $3 - compact way to represent a list of SRv6 routers with their operations
+#       (i.e. behaviors) that each of them needs to perform. Every <nodeid:op>
+#       element constructs a SID that is associated with the behavior <op> on
+#       the <nodeid> node. The list of such elements forms an SRv6 Policy.
+__setup_rt_policy()
+{
+	local dst="$1"
+	local encap_rt="$2"
+	local policy_rts="$3"
+	local behavior_cfg
+	local in_nsname
+	local rt_nsname
+	local policy=''
+	local function
+	local fullsid
+	local op_type
+	local node
+	local n
+
+	in_nsname="$(get_rtname "${encap_rt}")"
+
+	for n in ${policy_rts}; do
+		node="$(__get_srv6_rtcfg_id "${n}")"
+		op_type="$(__get_srv6_rtcfg_op "${n}")"
+		rt_nsname="$(get_rtname "${node}")"
+
+		case "${op_type}" in
+		"noflv")
+			policy="${policy}${LOCATOR_SERVICE}:${node}::${END_FUNC},"
+			function="${END_FUNC}"
+			behavior_cfg="End"
+			;;
+
+		"psp")
+			policy="${policy}${LOCATOR_SERVICE}:${node}::${END_PSP_FUNC},"
+			function="${END_PSP_FUNC}"
+			behavior_cfg="End flavors psp"
+			;;
+
+		*)
+			break
+			;;
+		esac
+
+		fullsid="${LOCATOR_SERVICE}:${node}::${function}"
+
+		# add SRv6 Endpoint behavior to the selected router
+		if ! ip -netns "${rt_nsname}" -6 route get "${fullsid}" \
+			&>/dev/null; then
+			ip -netns "${rt_nsname}" -6 route \
+				add "${fullsid}" \
+				table "${LOCALSID_TABLE_ID}" \
+				encap seg6local action ${behavior_cfg} \
+				dev "${DUMMY_DEVNAME}"
+		fi
+	done
+
+	# we need to remove the trailing comma to avoid inserting an empty
+	# address (::0) in the SID List.
+	policy="${policy%,}"
+
+	# add SRv6 policy to incoming traffic sent by connected hosts
+	ip -netns "${in_nsname}" -6 route \
+		add "${IPv6_HS_NETWORK}::${dst}" \
+		encap seg6 mode inline segs "${policy}" \
+		dev "${DUMMY_DEVNAME}"
+
+	ip -netns "${in_nsname}" -6 neigh \
+		add proxy "${IPv6_HS_NETWORK}::${dst}" \
+		dev "${RT2HS_DEVNAME}"
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+	__setup_rt_policy "$1" "$2" "$3"
+}
+
+setup_hs()
+{
+	local hs="$1"
+	local rt="$2"
+	local hsname
+	local rtname
+
+	hsname="$(get_hsname "${hs}")"
+	rtname="$(get_rtname "${rt}")"
+
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns "${hsname}" link add veth0 type veth \
+		peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+	ip -netns "${hsname}" addr \
+		add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+
+	ip -netns "${hsname}" link set veth0 up
+	ip -netns "${hsname}" link set lo up
+
+	ip -netns "${rtname}" addr \
+		add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+
+	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+}
+
+setup()
+{
+	local i
+
+	# create routers
+	ROUTERS="1 2 3 4"; readonly ROUTERS
+	for i in ${ROUTERS}; do
+		create_router "${i}"
+	done
+
+	# create hosts
+	HOSTS="1 2"; readonly HOSTS
+	for i in ${HOSTS}; do
+		create_host "${i}"
+	done
+
+	# set up the links for connecting routers
+	add_link_rt_pairs 1 "2 3 4"
+	add_link_rt_pairs 2 "3 4"
+	add_link_rt_pairs 3 "4"
+
+	# set up the basic connectivity of routers and routes required for
+	# reachability of SIDs.
+	setup_rt_networking 1 "2 3 4"
+	setup_rt_networking 2 "1 3 4"
+	setup_rt_networking 3 "1 2 4"
+	setup_rt_networking 4 "1 2 3"
+
+	# set up the hosts connected to routers
+	setup_hs 1 1
+	setup_hs 2 2
+
+	# set up default SRv6 Endpoints (i.e. SRv6 End behavior)
+	setup_rt_local_sids 1 "2 3 4"
+	setup_rt_local_sids 2 "1 3 4"
+	setup_rt_local_sids 3 "1 2 4"
+	setup_rt_local_sids 4 "1 2 3"
+
+	# set up SRv6 policies
+	# create a connection between hosts hs-1 and hs-2.
+	# The path between hs-1 and hs-2 traverses SRv6 aware routers.
+	# For each direction two path are chosen:
+	#
+	# Direction hs-1 -> hs-2 (PSP flavor)
+	#  - rt-1 (SRv6 H.Insert policy)
+	#  - rt-3 (SRv6 End behavior)
+	#  - rt-4 (SRv6 End flavor PSP with SL>1, acting as End behavior)
+	#  - rt-2 (SRv6 End flavor PSP with SL=1)
+	#
+	# Direction hs-2 -> hs-1 (PSP flavor)
+	#  - rt-2 (SRv6 H.Insert policy)
+	#  - rt-1 (SRv6 End flavor PSP with SL=1)
+	setup_rt_policy_ipv6 2 1 "3:noflv 4:psp 2:psp"
+	setup_rt_policy_ipv6 1 2 "1:psp"
+
+	# testing environment was set up successfully
+	SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+	local prefix
+	local rtsrc_nsname
+
+	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+	ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+
+	check_rt_connectivity "${rtsrc}" "${rtdst}"
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc="$1"
+
+	check_hs_ipv6_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+	local i
+	local j
+
+	log_section "IPv6 routers connectivity test"
+
+	for i in ${ROUTERS}; do
+		for j in ${ROUTERS}; do
+			if [ "${i}" -eq "${j}" ]; then
+				continue
+			fi
+
+			check_and_log_rt_connectivity "${i}" "${j}"
+		done
+	done
+}
+
+host2gateway_tests()
+{
+	local hs
+
+	log_section "IPv6 connectivity test among hosts and gateways"
+
+	for hs in ${HOSTS}; do
+		check_and_log_hs2gw_connectivity "${hs}"
+	done
+}
+
+host_srv6_end_flv_psp_tests()
+{
+	log_section "SRv6 connectivity test hosts (h1 <-> h2, PSP flavor)"
+
+	check_and_log_hs_connectivity 1 2
+	check_and_log_hs_connectivity 2 1
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+	local flavor="$1"
+
+	if ! ip route help 2>&1 | grep -qo "${flavor}"; then
+		echo "SKIP: Missing SRv6 ${flavor} flavor support in iproute2"
+		exit "${ksft_skip}"
+	fi
+}
+
+test_kernel_supp_or_ksft_skip()
+{
+	local flavor="$1"
+	local test_netns
+
+	test_netns="kflv-$(mktemp -u XXXXXXXX)"
+
+	if ! ip netns add "${test_netns}"; then
+		echo "SKIP: Cannot set up netns to test kernel support for flavors"
+		exit "${ksft_skip}"
+	fi
+
+	if ! ip -netns "${test_netns}" link \
+		add "${DUMMY_DEVNAME}" type dummy; then
+		echo "SKIP: Cannot set up dummy dev to test kernel support for flavors"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	if ! ip -netns "${test_netns}" link \
+		set "${DUMMY_DEVNAME}" up; then
+		echo "SKIP: Cannot activate dummy dev to test kernel support for flavors"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	if ! ip -netns "${test_netns}" -6 route \
+		add "${IPv6_TESTS_ADDR}" encap seg6local \
+		action End flavors "${flavor}" dev "${DUMMY_DEVNAME}"; then
+		echo "SKIP: ${flavor} flavor not supported in kernel"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	ip netns del "${test_netns}"
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+	local test_netns
+
+	test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+	if ! ip netns add "${test_netns}"; then
+		echo "SKIP: Cannot set up netns for testing dummy dev support"
+		exit "${ksft_skip}"
+	fi
+
+	modprobe dummy &>/dev/null || true
+	if ! ip -netns "${test_netns}" link \
+		add "${DUMMY_DEVNAME}" type dummy; then
+		echo "SKIP: dummy dev not supported"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	ip netns del "${test_netns}"
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+test_command_or_ksft_skip sed
+test_command_or_ksft_skip sort
+test_command_or_ksft_skip xargs
+
+test_dummy_dev_or_ksft_skip
+test_iproute2_supp_or_ksft_skip psp
+test_kernel_supp_or_ksft_skip psp
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_srv6_end_flv_psp_tests
+
+print_log_test_results
-- 
cgit v1.2.3


From c5a237a4db21ca7a28518c994def39d7bd62a0d1 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 15 Feb 2023 00:12:18 +0100
Subject: selftests/bpf: Use bpf_{btf,link,map,prog}_get_info_by_fd()

Use the new type-safe wrappers around bpf_obj_get_info_by_fd().
Fix a prog/map mixup in prog_holds_map().

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230214231221.249277-6-iii@linux.ibm.com
---
 .../selftests/bpf/map_tests/map_in_map_batch_ops.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  |  8 ++++----
 .../testing/selftests/bpf/prog_tests/bpf_obj_id.c  | 20 +++++++++---------
 tools/testing/selftests/bpf/prog_tests/btf.c       | 24 +++++++++++-----------
 .../selftests/bpf/prog_tests/btf_map_in_map.c      |  2 +-
 tools/testing/selftests/bpf/prog_tests/check_mtu.c |  2 +-
 .../selftests/bpf/prog_tests/enable_stats.c        |  2 +-
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       | 14 ++++++-------
 .../bpf/prog_tests/flow_dissector_reattach.c       | 10 ++++-----
 .../bpf/prog_tests/libbpf_get_fd_by_id_opts.c      |  4 ++--
 .../testing/selftests/bpf/prog_tests/lsm_cgroup.c  |  3 ++-
 tools/testing/selftests/bpf/prog_tests/metadata.c  |  8 ++++----
 tools/testing/selftests/bpf/prog_tests/mmap.c      |  2 +-
 tools/testing/selftests/bpf/prog_tests/perf_link.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/pinning.c   |  2 +-
 .../selftests/bpf/prog_tests/prog_run_opts.c       |  2 +-
 tools/testing/selftests/bpf/prog_tests/recursion.c |  4 ++--
 .../selftests/bpf/prog_tests/sockmap_basic.c       |  6 +++---
 .../selftests/bpf/prog_tests/task_local_storage.c  |  8 ++++----
 tools/testing/selftests/bpf/prog_tests/tc_bpf.c    |  4 ++--
 .../selftests/bpf/prog_tests/tp_attach_query.c     |  5 +++--
 .../selftests/bpf/prog_tests/unpriv_bpf_disabled.c |  8 ++++----
 .../testing/selftests/bpf/prog_tests/verif_stats.c |  5 +++--
 .../testing/selftests/bpf/prog_tests/xdp_attach.c  |  4 ++--
 .../selftests/bpf/prog_tests/xdp_cpumap_attach.c   |  8 ++++----
 .../selftests/bpf/prog_tests/xdp_devmap_attach.c   |  8 ++++----
 tools/testing/selftests/bpf/prog_tests/xdp_info.c  |  2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_link.c  | 10 +++++----
 tools/testing/selftests/bpf/test_maps.c            |  2 +-
 .../selftests/bpf/test_skb_cgroup_id_user.c        |  2 +-
 .../selftests/bpf/test_tcp_check_syncookie_user.c  |  2 +-
 tools/testing/selftests/bpf/test_verifier.c        |  8 ++++----
 tools/testing/selftests/bpf/testing_helpers.c      |  2 +-
 tools/testing/selftests/bpf/xdp_synproxy.c         | 15 ++++++++------
 34 files changed, 109 insertions(+), 101 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index f472d28ad11a..16f1671e4bde 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -18,7 +18,7 @@ static __u32 get_map_id_from_fd(int map_fd)
 	uint32_t info_len = sizeof(map_info);
 	int ret;
 
-	ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+	ret = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
 	CHECK(ret < 0, "Finding map info failed", "error:%s\n",
 	      strerror(errno));
 
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 3af6450763e9..1f02168103dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -195,8 +195,8 @@ static void check_bpf_link_info(const struct bpf_program *prog)
 		return;
 
 	info_len = sizeof(info);
-	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
-	ASSERT_OK(err, "bpf_obj_get_info_by_fd");
+	err = bpf_link_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+	ASSERT_OK(err, "bpf_link_get_info_by_fd");
 	ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid");
 
 	bpf_link__destroy(link);
@@ -684,13 +684,13 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 
 	/* setup filtering map_id in bpf program */
 	map_info_len = sizeof(map_info);
-	err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+	err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
 	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
 		  strerror(errno)))
 		goto free_map2;
 	skel->bss->map1_id = map_info.id;
 
-	err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+	err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
 	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
 		  strerror(errno)))
 		goto free_map2;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index e1c1e521cca2..675b90b15280 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -44,7 +44,7 @@ void serial_test_bpf_obj_id(void)
 	CHECK(err >= 0 || errno != ENOENT,
 	      "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
 
-	/* Check bpf_obj_get_info_by_fd() */
+	/* Check bpf_map_get_info_by_fd() */
 	bzero(zeros, sizeof(zeros));
 	for (i = 0; i < nr_iters; i++) {
 		now = time(NULL);
@@ -79,7 +79,7 @@ void serial_test_bpf_obj_id(void)
 		/* Check getting map info */
 		info_len = sizeof(struct bpf_map_info) * 2;
 		bzero(&map_infos[i], info_len);
-		err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
+		err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
 					     &info_len);
 		if (CHECK(err ||
 			  map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
@@ -118,8 +118,8 @@ void serial_test_bpf_obj_id(void)
 		err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
 		if (CHECK_FAIL(err))
 			goto done;
-		err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
-					     &info_len);
+		err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
+					      &info_len);
 		load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
 			+ (prog_infos[i].load_time / nsec_per_sec);
 		if (CHECK(err ||
@@ -161,8 +161,8 @@ void serial_test_bpf_obj_id(void)
 		bzero(&link_infos[i], info_len);
 		link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name);
 		link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
-		err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
-					     &link_infos[i], &info_len);
+		err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
+					      &link_infos[i], &info_len);
 		if (CHECK(err ||
 			  link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
 			  link_infos[i].prog_id != prog_infos[i].id ||
@@ -217,7 +217,7 @@ void serial_test_bpf_obj_id(void)
 		 * prog_info.map_ids = NULL
 		 */
 		prog_info.nr_map_ids = 1;
-		err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
 		if (CHECK(!err || errno != EFAULT,
 			  "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
 			  err, errno, EFAULT))
@@ -228,7 +228,7 @@ void serial_test_bpf_obj_id(void)
 		saved_map_id = *(int *)((long)prog_infos[i].map_ids);
 		prog_info.map_ids = prog_infos[i].map_ids;
 		prog_info.nr_map_ids = 2;
-		err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
 		prog_infos[i].jited_prog_insns = 0;
 		prog_infos[i].xlated_prog_insns = 0;
 		CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
@@ -277,7 +277,7 @@ void serial_test_bpf_obj_id(void)
 		if (CHECK_FAIL(err))
 			goto done;
 
-		err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+		err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
 		CHECK(err || info_len != sizeof(struct bpf_map_info) ||
 		      memcmp(&map_info, &map_infos[i], info_len) ||
 		      array_value != array_magic_value,
@@ -322,7 +322,7 @@ void serial_test_bpf_obj_id(void)
 
 		nr_id_found++;
 
-		err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len);
+		err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
 		cmp_res = memcmp(&link_info, &link_infos[i],
 				offsetof(struct bpf_link_info, raw_tracepoint));
 		CHECK(err || info_len != sizeof(link_info) || cmp_res,
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index de1b5b9eb93a..cbb600be943d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4422,7 +4422,7 @@ static int test_big_btf_info(unsigned int test_num)
 	info->btf = ptr_to_u64(user_btf);
 	info->btf_size = raw_btf_size;
 
-	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len);
 	if (CHECK(!err, "!err")) {
 		err = -1;
 		goto done;
@@ -4435,7 +4435,7 @@ static int test_big_btf_info(unsigned int test_num)
 	 * to userspace.
 	 */
 	info_garbage.garbage = 0;
-	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len);
 	if (CHECK(err || info_len != sizeof(*info),
 		  "err:%d errno:%d info_len:%u sizeof(*info):%zu",
 		  err, errno, info_len, sizeof(*info))) {
@@ -4499,7 +4499,7 @@ static int test_btf_id(unsigned int test_num)
 
 	/* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
 	info_len = sizeof(info[0]);
-	err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+	err = bpf_btf_get_info_by_fd(btf_fd[0], &info[0], &info_len);
 	if (CHECK(err, "errno:%d", errno)) {
 		err = -1;
 		goto done;
@@ -4512,7 +4512,7 @@ static int test_btf_id(unsigned int test_num)
 	}
 
 	ret = 0;
-	err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+	err = bpf_btf_get_info_by_fd(btf_fd[1], &info[1], &info_len);
 	if (CHECK(err || info[0].id != info[1].id ||
 		  info[0].btf_size != info[1].btf_size ||
 		  (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
@@ -4535,7 +4535,7 @@ static int test_btf_id(unsigned int test_num)
 	}
 
 	info_len = sizeof(map_info);
-	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+	err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
 	if (CHECK(err || map_info.btf_id != info[0].id ||
 		  map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
 		  "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
@@ -4638,7 +4638,7 @@ static void do_test_get_info(unsigned int test_num)
 	info.btf_size = user_btf_size;
 
 	ret = 0;
-	err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+	err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len);
 	if (CHECK(err || !info.id || info_len != sizeof(info) ||
 		  info.btf_size != raw_btf_size ||
 		  (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
@@ -4755,7 +4755,7 @@ static void do_test_file(unsigned int test_num)
 
 	/* get necessary program info */
 	info_len = sizeof(struct bpf_prog_info);
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 
 	if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
@@ -4787,7 +4787,7 @@ static void do_test_file(unsigned int test_num)
 	info.func_info_rec_size = rec_size;
 	info.func_info = ptr_to_u64(func_info);
 
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 
 	if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
@@ -6405,7 +6405,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
 
 	/* get necessary lens */
 	info_len = sizeof(struct bpf_prog_info);
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
 		return -1;
@@ -6435,7 +6435,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
 	info.nr_func_info = nr_func_info;
 	info.func_info_rec_size = rec_size;
 	info.func_info = ptr_to_u64(func_info);
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
 		fprintf(stderr, "%s\n", btf_log_buf);
 		err = -1;
@@ -6499,7 +6499,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
 	nr_jited_func_lens = nr_jited_ksyms;
 
 	info_len = sizeof(struct bpf_prog_info);
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
 		err = -1;
 		goto done;
@@ -6573,7 +6573,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
 		info.jited_func_lens = ptr_to_u64(jited_func_lens);
 	}
 
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 
 	/*
 	 * Only recheck the info.*line_info* fields.
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index eb90a6b8850d..a8b53b8736f0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -14,7 +14,7 @@ static __u32 bpf_map_id(struct bpf_map *map)
 	int err;
 
 	memset(&info, 0, info_len);
-	err = bpf_obj_get_info_by_fd(bpf_map__fd(map), &info, &info_len);
+	err = bpf_map_get_info_by_fd(bpf_map__fd(map), &info, &info_len);
 	if (err)
 		return 0;
 	return info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 12f4395f18b3..5338d2ea0460 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -59,7 +59,7 @@ static void test_check_mtu_xdp_attach(void)
 
 	memset(&link_info, 0, sizeof(link_info));
 	fd = bpf_link__fd(link);
-	err = bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len);
+	err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
 	if (CHECK(err, "link_info", "failed: %d\n", err))
 		goto out;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
index 2cb2085917e7..75f85d0fe74a 100644
--- a/tools/testing/selftests/bpf/prog_tests/enable_stats.c
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
@@ -28,7 +28,7 @@ void test_enable_stats(void)
 
 	prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
 	memset(&info, 0, info_len);
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	if (CHECK(err, "get_prog_info",
 		  "failed to get bpf_prog_info for fd %d\n", prog_fd))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 20f5fa0fcec9..8ec73fdfcdab 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -79,7 +79,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 		return;
 
 	info_len = sizeof(prog_info);
-	err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len);
+	err = bpf_prog_get_info_by_fd(tgt_fd, &prog_info, &info_len);
 	if (!ASSERT_OK(err, "tgt_fd_get_info"))
 		goto close_prog;
 
@@ -136,8 +136,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 
 		info_len = sizeof(link_info);
 		memset(&link_info, 0, sizeof(link_info));
-		err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]),
-					     &link_info, &info_len);
+		err = bpf_link_get_info_by_fd(bpf_link__fd(link[i]),
+					      &link_info, &info_len);
 		ASSERT_OK(err, "link_fd_get_info");
 		ASSERT_EQ(link_info.tracing.attach_type,
 			  bpf_program__expected_attach_type(prog[i]),
@@ -417,7 +417,7 @@ static int find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 	struct btf *btf;
 	int ret;
 
-	ret = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
+	ret = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
 	if (ret)
 		return ret;
 
@@ -483,12 +483,12 @@ static void test_fentry_to_cgroup_bpf(void)
 	if (!ASSERT_GE(fentry_fd, 0, "load_fentry"))
 		goto cleanup;
 
-	/* Make sure bpf_obj_get_info_by_fd works correctly when attaching
+	/* Make sure bpf_prog_get_info_by_fd works correctly when attaching
 	 * to another BPF program.
 	 */
 
-	ASSERT_OK(bpf_obj_get_info_by_fd(fentry_fd, &info, &info_len),
-		  "bpf_obj_get_info_by_fd");
+	ASSERT_OK(bpf_prog_get_info_by_fd(fentry_fd, &info, &info_len),
+		  "bpf_prog_get_info_by_fd");
 
 	ASSERT_EQ(info.btf_id, 0, "info.btf_id");
 	ASSERT_EQ(info.attach_btf_id, btf_id, "info.attach_btf_id");
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 7c79462d2702..9333f7346d15 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -60,9 +60,9 @@ static __u32 query_prog_id(int prog)
 	__u32 info_len = sizeof(info);
 	int err;
 
-	err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog, &info, &info_len);
 	if (CHECK_FAIL(err || info_len != sizeof(info))) {
-		perror("bpf_obj_get_info_by_fd");
+		perror("bpf_prog_get_info_by_fd");
 		return 0;
 	}
 
@@ -497,7 +497,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
 	}
 
 	info_len = sizeof(info);
-	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	err = bpf_link_get_info_by_fd(link, &info, &info_len);
 	if (CHECK_FAIL(err)) {
 		perror("bpf_obj_get_info");
 		goto out_unlink;
@@ -521,7 +521,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
 
 	link_id = info.id;
 	info_len = sizeof(info);
-	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	err = bpf_link_get_info_by_fd(link, &info, &info_len);
 	if (CHECK_FAIL(err)) {
 		perror("bpf_obj_get_info");
 		goto out_unlink;
@@ -546,7 +546,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
 	netns = -1;
 
 	info_len = sizeof(info);
-	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	err = bpf_link_get_info_by_fd(link, &info, &info_len);
 	if (CHECK_FAIL(err)) {
 		perror("bpf_obj_get_info");
 		goto out_unlink;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
index 25e5dfa9c315..a3f238f51d05 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
@@ -29,9 +29,9 @@ void test_libbpf_get_fd_by_id_opts(void)
 	if (!ASSERT_OK(ret, "test_libbpf_get_fd_by_id_opts__attach"))
 		goto close_prog;
 
-	ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.data_input),
+	ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.data_input),
 				     &info_m, &len);
-	if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd"))
+	if (!ASSERT_OK(ret, "bpf_map_get_info_by_fd"))
 		goto close_prog;
 
 	fd = bpf_map_get_fd_by_id(info_m.id);
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
index f117bfef68a1..130a3b21e467 100644
--- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -47,7 +47,8 @@ static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func)
 
 		fd = bpf_prog_get_fd_by_id(p.prog_ids[i]);
 		ASSERT_GE(fd, 0, "prog_get_fd_by_id");
-		ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd");
+		ASSERT_OK(bpf_prog_get_info_by_fd(fd, &info, &info_len),
+			  "prog_info_by_fd");
 		close(fd);
 
 		if (info.attach_btf_id ==
diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c
index 2c53eade88e3..8b67dfc10f5c 100644
--- a/tools/testing/selftests/bpf/prog_tests/metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/metadata.c
@@ -16,7 +16,7 @@ static int duration;
 static int prog_holds_map(int prog_fd, int map_fd)
 {
 	struct bpf_prog_info prog_info = {};
-	struct bpf_prog_info map_info = {};
+	struct bpf_map_info map_info = {};
 	__u32 prog_info_len;
 	__u32 map_info_len;
 	__u32 *map_ids;
@@ -25,12 +25,12 @@ static int prog_holds_map(int prog_fd, int map_fd)
 	int i;
 
 	map_info_len = sizeof(map_info);
-	ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+	ret = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
 	if (ret)
 		return -errno;
 
 	prog_info_len = sizeof(prog_info);
-	ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+	ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
 	if (ret)
 		return -errno;
 
@@ -44,7 +44,7 @@ static int prog_holds_map(int prog_fd, int map_fd)
 	prog_info.map_ids = ptr_to_u64(map_ids);
 	prog_info_len = sizeof(prog_info);
 
-	ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+	ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
 	if (ret) {
 		ret = -errno;
 		goto free_map_ids;
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 37b002ca1167..a271d5a0f7ab 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -64,7 +64,7 @@ void test_mmap(void)
 
 	/* get map's ID */
 	memset(&map_info, 0, map_info_sz);
-	err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
+	err = bpf_map_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
 	if (CHECK(err, "map_get_info", "failed %d\n", errno))
 		goto cleanup;
 	data_map_id = map_info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
index 224eba6fef2e..3a25f1c743a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -54,7 +54,7 @@ void serial_test_perf_link(void)
 		goto cleanup;
 
 	memset(&info, 0, sizeof(info));
-	err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+	err = bpf_link_get_info_by_fd(link_fd, &info, &info_len);
 	if (!ASSERT_OK(err, "link_get_info"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index d95cee5867b7..c799a3c5ad1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -18,7 +18,7 @@ __u32 get_map_id(struct bpf_object *obj, const char *name)
 	if (CHECK(!map, "find map", "NULL map"))
 		return 0;
 
-	err = bpf_obj_get_info_by_fd(bpf_map__fd(map),
+	err = bpf_map_get_info_by_fd(bpf_map__fd(map),
 				     &map_info, &map_info_len);
 	CHECK(err, "get map info", "err %d errno %d", err, errno);
 	return map_info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
index 1ccd2bdf8fa8..01f1d1b6715a 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
@@ -12,7 +12,7 @@ static void check_run_cnt(int prog_fd, __u64 run_cnt)
 	__u32 info_len = sizeof(info);
 	int err;
 
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c
index f3af2627b599..23552d3e3365 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursion.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursion.c
@@ -31,8 +31,8 @@ void test_recursion(void)
 	bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key);
 	ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2");
 
-	err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_delete),
-				     &prog_info, &prog_info_len);
+	err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.on_delete),
+				      &prog_info, &prog_info_len);
 	if (!ASSERT_OK(err, "get_prog_info"))
 		goto out;
 	ASSERT_EQ(prog_info.recursion_misses, 2, "recursion_misses");
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 0aa088900699..0ce25a967481 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -299,9 +299,9 @@ static __u32 query_prog_id(int prog_fd)
 	__u32 info_len = sizeof(info);
 	int err;
 
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
-	    !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+	    !ASSERT_EQ(info_len, sizeof(info), "bpf_prog_get_info_by_fd"))
 		return 0;
 
 	return info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index a176bd75a748..ea8537c54413 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -119,19 +119,19 @@ static void test_recursion(void)
 
 	prog_fd = bpf_program__fd(skel->progs.on_lookup);
 	memset(&info, 0, sizeof(info));
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	ASSERT_OK(err, "get prog info");
 	ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion");
 
 	prog_fd = bpf_program__fd(skel->progs.on_update);
 	memset(&info, 0, sizeof(info));
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	ASSERT_OK(err, "get prog info");
 	ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
 
 	prog_fd = bpf_program__fd(skel->progs.on_enter);
 	memset(&info, 0, sizeof(info));
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	ASSERT_OK(err, "get prog info");
 	ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion");
 
@@ -221,7 +221,7 @@ static void test_nodeadlock(void)
 
 	info_len = sizeof(info);
 	prog_fd = bpf_program__fd(skel->progs.socket_post_create);
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	ASSERT_OK(err, "get prog info");
 	ASSERT_EQ(info.recursion_misses, 0, "prog recursion");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
index 4a505a5adf4d..e873766276d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -29,8 +29,8 @@ static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd)
 	__u32 info_len = sizeof(info);
 	int ret;
 
-	ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
-	if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd"))
+	ret = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+	if (!ASSERT_OK(ret, "bpf_prog_get_info_by_fd"))
 		return ret;
 
 	ret = bpf_tc_attach(hook, &opts);
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
index a479080533db..770fcc3bb1ba 100644
--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
+++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
@@ -45,8 +45,9 @@ void serial_test_tp_attach_query(void)
 		prog_info.xlated_prog_len = 0;
 		prog_info.nr_map_ids = 0;
 		info_len = sizeof(prog_info);
-		err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
-		if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+		err = bpf_prog_get_info_by_fd(prog_fd[i], &prog_info,
+					      &info_len);
+		if (CHECK(err, "bpf_prog_get_info_by_fd", "err %d errno %d\n",
 			  err, errno))
 			goto cleanup1;
 		saved_prog_ids[i] = prog_info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
index 1ed3cc2092db..8383a99f610f 100644
--- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
+++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
@@ -179,7 +179,7 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s
 	ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails");
 	ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails");
 
-	if (ASSERT_OK(bpf_obj_get_info_by_fd(map_fds[0], &map_info, &map_info_len),
+	if (ASSERT_OK(bpf_map_get_info_by_fd(map_fds[0], &map_info, &map_info_len),
 		      "obj_get_info_by_fd")) {
 		ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails");
 		ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM,
@@ -187,8 +187,8 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s
 	}
 	ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails");
 
-	if (ASSERT_OK(bpf_obj_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter),
-					     &link_info, &link_info_len),
+	if (ASSERT_OK(bpf_link_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter),
+					      &link_info, &link_info_len),
 		      "obj_get_info_by_fd")) {
 		ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails");
 		ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM,
@@ -269,7 +269,7 @@ void test_unpriv_bpf_disabled(void)
 	}
 
 	prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter);
-	ASSERT_OK(bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len),
+	ASSERT_OK(bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len),
 		  "obj_get_info_by_fd");
 	prog_id = prog_info.id;
 	ASSERT_GT(prog_id, 0, "valid_prog_id");
diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
index a47e7c0e1ffd..af4b95f57ac1 100644
--- a/tools/testing/selftests/bpf/prog_tests/verif_stats.c
+++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
@@ -16,8 +16,9 @@ void test_verif_stats(void)
 	if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
 		goto cleanup;
 
-	err = bpf_obj_get_info_by_fd(skel->progs.sys_enter.prog_fd, &info, &len);
-	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+	err = bpf_prog_get_info_by_fd(skel->progs.sys_enter.prog_fd,
+				      &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
 		goto cleanup;
 
 	if (!ASSERT_GT(info.verified_insns, 0, "verified_insns"))
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 062fbc8c8e5e..d4cd9f873c14 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -18,7 +18,7 @@ void serial_test_xdp_attach(void)
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
 	if (CHECK_FAIL(err))
 		return;
-	err = bpf_obj_get_info_by_fd(fd1, &info, &len);
+	err = bpf_prog_get_info_by_fd(fd1, &info, &len);
 	if (CHECK_FAIL(err))
 		goto out_1;
 	id1 = info.id;
@@ -28,7 +28,7 @@ void serial_test_xdp_attach(void)
 		goto out_1;
 
 	memset(&info, 0, sizeof(info));
-	err = bpf_obj_get_info_by_fd(fd2, &info, &len);
+	err = bpf_prog_get_info_by_fd(fd2, &info, &len);
 	if (CHECK_FAIL(err))
 		goto out_2;
 	id2 = info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index f775a1613833..481626a875d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -33,8 +33,8 @@ static void test_xdp_with_cpumap_helpers(void)
 
 	prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
 	map_fd = bpf_map__fd(skel->maps.cpu_map);
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
-	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
 		goto out_close;
 
 	val.bpf_prog.fd = prog_fd;
@@ -85,8 +85,8 @@ static void test_xdp_with_cpumap_frags_helpers(void)
 
 	frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
 	map_fd = bpf_map__fd(skel->maps.cpu_map);
-	err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len);
-	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+	err = bpf_prog_get_info_by_fd(frags_prog_fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
 		goto out_close;
 
 	val.bpf_prog.fd = frags_prog_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index ead40016c324..ce6812558287 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -35,8 +35,8 @@ static void test_xdp_with_devmap_helpers(void)
 
 	dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
 	map_fd = bpf_map__fd(skel->maps.dm_ports);
-	err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
-	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+	err = bpf_prog_get_info_by_fd(dm_fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
 		goto out_close;
 
 	val.bpf_prog.fd = dm_fd;
@@ -98,8 +98,8 @@ static void test_xdp_with_devmap_frags_helpers(void)
 
 	dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
 	map_fd = bpf_map__fd(skel->maps.dm_ports);
-	err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len);
-	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+	err = bpf_prog_get_info_by_fd(dm_fd_frags, &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
 		goto out_close;
 
 	val.bpf_prog.fd = dm_fd_frags;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index 286c21ecdc65..1dbddcab87a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -34,7 +34,7 @@ void serial_test_xdp_info(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
 	if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
 		goto out_close;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 3e9d5c5521f0..e7e9f3c22edf 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -29,13 +29,13 @@ void serial_test_xdp_link(void)
 	prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
 
 	memset(&prog_info, 0, sizeof(prog_info));
-	err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
 	if (!ASSERT_OK(err, "fd_info1"))
 		goto cleanup;
 	id1 = prog_info.id;
 
 	memset(&prog_info, 0, sizeof(prog_info));
-	err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
 	if (!ASSERT_OK(err, "fd_info2"))
 		goto cleanup;
 	id2 = prog_info.id;
@@ -119,7 +119,8 @@ void serial_test_xdp_link(void)
 		goto cleanup;
 
 	memset(&link_info, 0, sizeof(link_info));
-	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+	err = bpf_link_get_info_by_fd(bpf_link__fd(link),
+				      &link_info, &link_info_len);
 	if (!ASSERT_OK(err, "link_info"))
 		goto cleanup;
 
@@ -137,7 +138,8 @@ void serial_test_xdp_link(void)
 		goto cleanup;
 
 	memset(&link_info, 0, sizeof(link_info));
-	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+	err = bpf_link_get_info_by_fd(bpf_link__fd(link),
+				      &link_info, &link_info_len);
 
 	ASSERT_OK(err, "link_info");
 	ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index b73152822aa2..7fc00e423e4d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1275,7 +1275,7 @@ static void test_map_in_map(void)
 			goto out_map_in_map;
 		}
 
-		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		err = bpf_map_get_info_by_fd(fd, &info, &len);
 		if (err) {
 			printf("Failed to get map info by fd %d: %d", fd,
 			       errno);
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 3256de30f563..ed518d075d1d 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -93,7 +93,7 @@ int get_map_fd_by_prog_id(int prog_id)
 	info.nr_map_ids = 1;
 	info.map_ids = (__u64) (unsigned long) map_ids;
 
-	if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+	if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) {
 		log_err("Failed to get info by prog fd %d", prog_fd);
 		goto err;
 	}
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index 5c8ef062f760..32df93747095 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -96,7 +96,7 @@ static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
 	info.nr_map_ids = 1;
 	info.map_ids = (__u64)(unsigned long)map_ids;
 
-	if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+	if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) {
 		log_err("Failed to get info by prog fd %d", prog_fd);
 		goto err;
 	}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 887c49dc5abd..8b9949bb833d 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1239,8 +1239,8 @@ static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
 	__u32 xlated_prog_len;
 	__u32 buf_element_size = sizeof(struct bpf_insn);
 
-	if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
-		perror("bpf_obj_get_info_by_fd failed");
+	if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+		perror("bpf_prog_get_info_by_fd failed");
 		return -1;
 	}
 
@@ -1261,8 +1261,8 @@ static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
 	bzero(&info, sizeof(info));
 	info.xlated_prog_len = xlated_prog_len;
 	info.xlated_prog_insns = (__u64)(unsigned long)*buf;
-	if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
-		perror("second bpf_obj_get_info_by_fd failed");
+	if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+		perror("second bpf_prog_get_info_by_fd failed");
 		goto out_free_buf;
 	}
 
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 9695318e8132..6c44153755e6 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -164,7 +164,7 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
 	int err;
 
 	memset(info, 0, sizeof(*info));
-	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+	err = bpf_link_get_info_by_fd(bpf_link__fd(link), info, &info_len);
 	if (err) {
 		printf("failed to get link info: %d\n", -errno);
 		return 0;
diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
index 6dbe0b745198..ce68c342b56f 100644
--- a/tools/testing/selftests/bpf/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -217,9 +217,10 @@ static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc)
 
 	prog_fd = bpf_program__fd(prog);
 
-	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
 	if (err < 0) {
-		fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+		fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n",
+			strerror(-err));
 		goto out;
 	}
 	attached_tc = tc;
@@ -292,9 +293,10 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports
 	};
 	info_len = sizeof(prog_info);
 
-	err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+	err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
 	if (err != 0) {
-		fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+		fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n",
+			strerror(-err));
 		goto out;
 	}
 
@@ -317,9 +319,10 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports
 		map_fd = err;
 
 		info_len = sizeof(map_info);
-		err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+		err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
 		if (err != 0) {
-			fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err));
+			fprintf(stderr, "Error: bpf_map_get_info_by_fd: %s\n",
+				strerror(-err));
 			close(map_fd);
 			goto err_close_map_fds;
 		}
-- 
cgit v1.2.3


From df71a42cc37a44cdc7682f57aecf14ff44391eed Mon Sep 17 00:00:00 2001
From: Taichi Nishimura <awkrail01@gmail.com>
Date: Thu, 16 Feb 2023 17:55:37 +0900
Subject: Fix typos in selftest/bpf files

Run spell checker on files in selftest/bpf and fixed typos.

Signed-off-by: Taichi Nishimura <awkrail01@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/bpf/20230216085537.519062-1-awkrail01@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c    | 2 +-
 tools/testing/selftests/bpf/prog_tests/trampoline_count.c     | 2 +-
 tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c | 2 +-
 tools/testing/selftests/bpf/progs/dynptr_fail.c               | 2 +-
 tools/testing/selftests/bpf/progs/strobemeta.h                | 2 +-
 tools/testing/selftests/bpf/progs/test_cls_redirect.c         | 6 +++---
 tools/testing/selftests/bpf/progs/test_subprogs.c             | 2 +-
 tools/testing/selftests/bpf/progs/test_xdp_vlan.c             | 2 +-
 tools/testing/selftests/bpf/test_cpp.cpp                      | 2 +-
 tools/testing/selftests/bpf/veristat.c                        | 4 ++--
 10 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
index eb2feaac81fe..653b0a20fab9 100644
--- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -488,7 +488,7 @@ static void run_test(struct migrate_reuseport_test_case *test_case,
 			goto close_servers;
 	}
 
-	/* Tie requests to the first four listners */
+	/* Tie requests to the first four listeners */
 	err = start_clients(test_case);
 	if (!ASSERT_OK(err, "start_clients"))
 		goto close_clients;
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 8fd4c0d78089..e91d0d1769f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -79,7 +79,7 @@ void serial_test_trampoline_count(void)
 	if (!ASSERT_EQ(link, NULL, "ptr_is_null"))
 		goto cleanup;
 
-	/* and finaly execute the probe */
+	/* and finally execute the probe */
 	prog_fd = bpf_program__fd(prog);
 	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 26fffb02ed10..ad21ee8c7e23 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -84,7 +84,7 @@ typedef void (*printf_fn_t)(const char *, ...);
  *	typedef int (*fn_t)(int);
  *	typedef char * const * (*fn_ptr2_t)(s_t, fn_t);
  *
- * - `fn_complext_t`: pointer to a function returning struct and accepting
+ * - `fn_complex_t`: pointer to a function returning struct and accepting
  *   union and struct. All structs and enum are anonymous and defined inline.
  *
  * - `signal_t: pointer to a function accepting a pointer to a function as an
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 5950ad6ec2e6..aa5b69354b91 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -630,7 +630,7 @@ static int release_twice_callback_fn(__u32 index, void *data)
 }
 
 /* Test that releasing a dynptr twice, where one of the releases happens
- * within a calback function, fails
+ * within a callback function, fails
  */
 SEC("?raw_tp")
 __failure __msg("arg 1 is an unacquired reference")
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 753718595c26..e562be6356f3 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -135,7 +135,7 @@ struct strobe_value_loc {
 	 * tpidr_el0 for aarch64).
 	 * TLS_IMM_EXEC: absolute address of GOT entry containing offset
 	 * from thread pointer;
-	 * TLS_GENERAL_DYN: absolute addres of double GOT entry
+	 * TLS_GENERAL_DYN: absolute address of double GOT entry
 	 * containing tls_index_t struct;
 	 */
 	int64_t offset;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 2833ad722cb7..66b304982245 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -600,7 +600,7 @@ static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
 		return TC_ACT_SHOT;
 	}
 
-	/* Skip the remainig next hops (may be zero). */
+	/* Skip the remaining next hops (may be zero). */
 	return skip_next_hops(pkt, encap->unigue.hop_count -
 					   encap->unigue.next_hop - 1);
 }
@@ -610,8 +610,8 @@ static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
  *
  *    fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
  *
- * clang will substitue a costant for sizeof, which allows the verifier
- * to track it's value. Based on this, it can figure out the constant
+ * clang will substitute a constant for sizeof, which allows the verifier
+ * to track its value. Based on this, it can figure out the constant
  * return value, and calling code works while still being "generic" to
  * IPv4 and IPv6.
  */
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index f8e9256cf18d..a8d602d7c88a 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -47,7 +47,7 @@ static __noinline int sub5(int v)
 	return sub1(v) - 1; /* compensates sub1()'s + 1 */
 }
 
-/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer
+/* unfortunately verifier rejects `struct task_struct *t` as an unknown pointer
  * type, so we need to accept pointer as integer and then cast it inside the
  * function
  */
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index cdf3c48d6cbb..4ddcb6dfe500 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -98,7 +98,7 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
 	return true;
 }
 
-/* Hint, VLANs are choosen to hit network-byte-order issues */
+/* Hint, VLANs are chosen to hit network-byte-order issues */
 #define TESTVLAN 4011 /* 0xFAB */
 // #define TO_VLAN  4000 /* 0xFA0 (hint 0xOA0 = 160) */
 
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index 0bd9990e83fa..f4936834f76f 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -91,7 +91,7 @@ static void try_skeleton_template()
 
 	skel.detach();
 
-	/* destructor will destory underlying skeleton */
+	/* destructor will destroy underlying skeleton */
 }
 
 int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index f961b49b8ef4..83231456d3c5 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -144,7 +144,7 @@ static struct env {
 	struct verif_stats *prog_stats;
 	int prog_stat_cnt;
 
-	/* baseline_stats is allocated and used only in comparsion mode */
+	/* baseline_stats is allocated and used only in comparison mode */
 	struct verif_stats *baseline_stats;
 	int baseline_stat_cnt;
 
@@ -882,7 +882,7 @@ static int process_obj(const char *filename)
 		 * that BPF object file is incomplete and has to be statically
 		 * linked into a final BPF object file; instead of bailing
 		 * out, report it into stderr, mark it as skipped, and
-		 * proceeed
+		 * proceed
 		 */
 		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
 		env.files_skipped++;
-- 
cgit v1.2.3


From 95ebb376176c52382293e05e63f142114a5e40ef Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Feb 2023 20:59:53 -0800
Subject: selftests/bpf: Convert test_global_funcs test to test_loader
 framework

Convert 17 test_global_funcs subtests into test_loader framework for
easier maintenance and more declarative way to define expected
failures/successes.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20230216045954.3002473-3-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/test_global_funcs.c   | 131 ++++++---------------
 .../selftests/bpf/progs/test_global_func1.c        |   6 +-
 .../selftests/bpf/progs/test_global_func10.c       |   4 +-
 .../selftests/bpf/progs/test_global_func11.c       |   4 +-
 .../selftests/bpf/progs/test_global_func12.c       |   4 +-
 .../selftests/bpf/progs/test_global_func13.c       |   4 +-
 .../selftests/bpf/progs/test_global_func14.c       |   4 +-
 .../selftests/bpf/progs/test_global_func15.c       |   4 +-
 .../selftests/bpf/progs/test_global_func16.c       |   4 +-
 .../selftests/bpf/progs/test_global_func17.c       |   4 +-
 .../selftests/bpf/progs/test_global_func2.c        |  43 ++++++-
 .../selftests/bpf/progs/test_global_func3.c        |  10 +-
 .../selftests/bpf/progs/test_global_func4.c        |  55 ++++++++-
 .../selftests/bpf/progs/test_global_func5.c        |   4 +-
 .../selftests/bpf/progs/test_global_func6.c        |   4 +-
 .../selftests/bpf/progs/test_global_func7.c        |   4 +-
 .../selftests/bpf/progs/test_global_func8.c        |   4 +-
 .../selftests/bpf/progs/test_global_func9.c        |   4 +-
 18 files changed, 174 insertions(+), 123 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 7295cc60f724..2ff4d5c7abfc 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -1,104 +1,41 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include <test_progs.h>
-
-const char *err_str;
-bool found;
-
-static int libbpf_debug_print(enum libbpf_print_level level,
-			      const char *format, va_list args)
-{
-	char *log_buf;
-
-	if (level != LIBBPF_WARN ||
-	    strcmp(format, "libbpf: \n%s\n")) {
-		vprintf(format, args);
-		return 0;
-	}
-
-	log_buf = va_arg(args, char *);
-	if (!log_buf)
-		goto out;
-	if (err_str && strstr(log_buf, err_str) == 0)
-		found = true;
-out:
-	printf(format, log_buf);
-	return 0;
-}
-
-extern int extra_prog_load_log_flags;
-
-static int check_load(const char *file)
-{
-	struct bpf_object *obj = NULL;
-	struct bpf_program *prog;
-	int err;
-
-	found = false;
-
-	obj = bpf_object__open_file(file, NULL);
-	err = libbpf_get_error(obj);
-	if (err)
-		return err;
-
-	prog = bpf_object__next_program(obj, NULL);
-	if (!prog) {
-		err = -ENOENT;
-		goto err_out;
-	}
-
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
-	bpf_program__set_log_level(prog, extra_prog_load_log_flags);
-
-	err = bpf_object__load(obj);
-
-err_out:
-	bpf_object__close(obj);
-	return err;
-}
-
-struct test_def {
-	const char *file;
-	const char *err_str;
-};
+#include "test_global_func1.skel.h"
+#include "test_global_func2.skel.h"
+#include "test_global_func3.skel.h"
+#include "test_global_func4.skel.h"
+#include "test_global_func5.skel.h"
+#include "test_global_func6.skel.h"
+#include "test_global_func7.skel.h"
+#include "test_global_func8.skel.h"
+#include "test_global_func9.skel.h"
+#include "test_global_func10.skel.h"
+#include "test_global_func11.skel.h"
+#include "test_global_func12.skel.h"
+#include "test_global_func13.skel.h"
+#include "test_global_func14.skel.h"
+#include "test_global_func15.skel.h"
+#include "test_global_func16.skel.h"
+#include "test_global_func17.skel.h"
 
 void test_test_global_funcs(void)
 {
-	struct test_def tests[] = {
-		{ "test_global_func1.bpf.o", "combined stack size of 4 calls is 544" },
-		{ "test_global_func2.bpf.o" },
-		{ "test_global_func3.bpf.o", "the call stack of 8 frames" },
-		{ "test_global_func4.bpf.o" },
-		{ "test_global_func5.bpf.o", "expected pointer to ctx, but got PTR" },
-		{ "test_global_func6.bpf.o", "modified ctx ptr R2" },
-		{ "test_global_func7.bpf.o", "foo() doesn't return scalar" },
-		{ "test_global_func8.bpf.o" },
-		{ "test_global_func9.bpf.o" },
-		{ "test_global_func10.bpf.o", "invalid indirect read from stack" },
-		{ "test_global_func11.bpf.o", "Caller passes invalid args into func#1" },
-		{ "test_global_func12.bpf.o", "invalid mem access 'mem_or_null'" },
-		{ "test_global_func13.bpf.o", "Caller passes invalid args into func#1" },
-		{ "test_global_func14.bpf.o", "reference type('FWD S') size cannot be determined" },
-		{ "test_global_func15.bpf.o", "At program exit the register R0 has value" },
-		{ "test_global_func16.bpf.o", "invalid indirect read from stack" },
-		{ "test_global_func17.bpf.o", "Caller passes invalid args into func#1" },
-	};
-	libbpf_print_fn_t old_print_fn = NULL;
-	int err, i, duration = 0;
-
-	old_print_fn = libbpf_set_print(libbpf_debug_print);
-
-	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		const struct test_def *test = &tests[i];
-
-		if (!test__start_subtest(test->file))
-			continue;
-
-		err_str = test->err_str;
-		err = check_load(test->file);
-		CHECK_FAIL(!!err ^ !!err_str);
-		if (err_str)
-			CHECK(found, "", "expected string '%s'", err_str);
-	}
-	libbpf_set_print(old_print_fn);
+	RUN_TESTS(test_global_func1);
+	RUN_TESTS(test_global_func2);
+	RUN_TESTS(test_global_func3);
+	RUN_TESTS(test_global_func4);
+	RUN_TESTS(test_global_func5);
+	RUN_TESTS(test_global_func6);
+	RUN_TESTS(test_global_func7);
+	RUN_TESTS(test_global_func8);
+	RUN_TESTS(test_global_func9);
+	RUN_TESTS(test_global_func10);
+	RUN_TESTS(test_global_func11);
+	RUN_TESTS(test_global_func12);
+	RUN_TESTS(test_global_func13);
+	RUN_TESTS(test_global_func14);
+	RUN_TESTS(test_global_func15);
+	RUN_TESTS(test_global_func16);
+	RUN_TESTS(test_global_func17);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index 7b42dad187b8..23970a20b324 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -3,10 +3,9 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
-#ifndef MAX_STACK
 #define MAX_STACK (512 - 3 * 32 + 8)
-#endif
 
 static __attribute__ ((noinline))
 int f0(int var, struct __sk_buff *skb)
@@ -39,7 +38,8 @@ int f3(int val, struct __sk_buff *skb, int var)
 }
 
 SEC("tc")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("combined stack size of 4 calls is 544")
+int global_func1(struct __sk_buff *skb)
 {
 	return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 97b7031d0e22..98327bdbbfd2 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 struct Small {
 	int x;
@@ -21,7 +22,8 @@ __noinline int foo(const struct Big *big)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("invalid indirect read from stack")
+int global_func10(struct __sk_buff *skb)
 {
 	const struct Small small = {.x = skb->len };
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c
index ef5277d982d9..283e036dc401 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func11.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func11.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 struct S {
 	int x;
@@ -13,7 +14,8 @@ __noinline int foo(const struct S *s)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("Caller passes invalid args into func#1")
+int global_func11(struct __sk_buff *skb)
 {
 	return foo((const void *)skb);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c
index 62343527cc59..7f159d83c6f6 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func12.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func12.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 struct S {
 	int x;
@@ -13,7 +14,8 @@ __noinline int foo(const struct S *s)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("invalid mem access 'mem_or_null'")
+int global_func12(struct __sk_buff *skb)
 {
 	const struct S s = {.x = skb->len };
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_func13.c b/tools/testing/selftests/bpf/progs/test_global_func13.c
index ff8897c1ac22..02ea80da75b5 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func13.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func13.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 struct S {
 	int x;
@@ -16,7 +17,8 @@ __noinline int foo(const struct S *s)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("Caller passes invalid args into func#1")
+int global_func13(struct __sk_buff *skb)
 {
 	const struct S *s = (const struct S *)(0xbedabeda);
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_func14.c b/tools/testing/selftests/bpf/progs/test_global_func14.c
index 698c77199ebf..33b7d5efd7b2 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func14.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func14.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 struct S;
 
@@ -14,7 +15,8 @@ __noinline int foo(const struct S *s)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("reference type('FWD S') size cannot be determined")
+int global_func14(struct __sk_buff *skb)
 {
 
 	return foo(NULL);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c
index c19c435988d5..b512d6a6c75e 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func15.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func15.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __noinline int foo(unsigned int *v)
 {
@@ -12,7 +13,8 @@ __noinline int foo(unsigned int *v)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("At program exit the register R0 has value")
+int global_func15(struct __sk_buff *skb)
 {
 	unsigned int v = 1;
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c
index 0312d1e8d8c0..e7206304632e 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func16.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func16.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __noinline int foo(int (*arr)[10])
 {
@@ -12,7 +13,8 @@ __noinline int foo(int (*arr)[10])
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("invalid indirect read from stack")
+int global_func16(struct __sk_buff *skb)
 {
 	int array[10];
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
index 2b8b9b8ba018..a32e11c7d933 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func17.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __noinline int foo(int *p)
 {
@@ -10,7 +11,8 @@ __noinline int foo(int *p)
 const volatile int i;
 
 SEC("tc")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("Caller passes invalid args into func#1")
+int global_func17(struct __sk_buff *skb)
 {
 	return foo((int *)&i);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c
index 2c18d82923a2..3dce97fb52a4 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func2.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func2.c
@@ -1,4 +1,45 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
 #define MAX_STACK (512 - 3 * 32)
-#include "test_global_func1.c"
+
+static __attribute__ ((noinline))
+int f0(int var, struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+	volatile char buf[MAX_STACK] = {};
+
+	return f0(0, skb) + skb->len;
+}
+
+int f3(int, struct __sk_buff *skb, int);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+	return f1(skb) + f3(val, skb, 1);
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+	volatile char buf[MAX_STACK] = {};
+
+	return skb->ifindex * val * var;
+}
+
+SEC("tc")
+__success
+int global_func2(struct __sk_buff *skb)
+{
+	return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
index 01bf8275dfd6..142b682d3c2f 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func3.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func3.c
@@ -3,6 +3,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __attribute__ ((noinline))
 int f1(struct __sk_buff *skb)
@@ -46,20 +47,15 @@ int f7(struct __sk_buff *skb)
 	return f6(skb);
 }
 
-#ifndef NO_FN8
 __attribute__ ((noinline))
 int f8(struct __sk_buff *skb)
 {
 	return f7(skb);
 }
-#endif
 
 SEC("tc")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("the call stack of 8 frames")
+int global_func3(struct __sk_buff *skb)
 {
-#ifndef NO_FN8
 	return f8(skb);
-#else
-	return f7(skb);
-#endif
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c
index 610f75edf276..1733d87ad3f3 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func4.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func4.c
@@ -1,4 +1,55 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2020 Facebook */
-#define NO_FN8
-#include "test_global_func3.c"
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+	return f1(skb) + val;
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+	return f2(var, skb) + val;
+}
+
+__attribute__ ((noinline))
+int f4(struct __sk_buff *skb)
+{
+	return f3(1, skb, 2);
+}
+
+__attribute__ ((noinline))
+int f5(struct __sk_buff *skb)
+{
+	return f4(skb);
+}
+
+__attribute__ ((noinline))
+int f6(struct __sk_buff *skb)
+{
+	return f5(skb);
+}
+
+__attribute__ ((noinline))
+int f7(struct __sk_buff *skb)
+{
+	return f6(skb);
+}
+
+SEC("tc")
+__success
+int global_func4(struct __sk_buff *skb)
+{
+	return f7(skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c
index 9248d03e0d06..cc55aedaf82d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func5.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func5.c
@@ -3,6 +3,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __attribute__ ((noinline))
 int f1(struct __sk_buff *skb)
@@ -25,7 +26,8 @@ int f3(int val, struct __sk_buff *skb)
 }
 
 SEC("tc")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("expected pointer to ctx, but got PTR")
+int global_func5(struct __sk_buff *skb)
 {
 	return f1(skb) + f2(2, skb) + f3(3, skb);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c
index af8c78bdfb25..46c38c8f2cf0 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func6.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func6.c
@@ -3,6 +3,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __attribute__ ((noinline))
 int f1(struct __sk_buff *skb)
@@ -25,7 +26,8 @@ int f3(int val, struct __sk_buff *skb)
 }
 
 SEC("tc")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("modified ctx ptr R2")
+int global_func6(struct __sk_buff *skb)
 {
 	return f1(skb) + f2(2, skb) + f3(3, skb);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c
index 6cb8e2f5254c..f182febfde3c 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func7.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func7.c
@@ -3,6 +3,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __attribute__ ((noinline))
 void foo(struct __sk_buff *skb)
@@ -11,7 +12,8 @@ void foo(struct __sk_buff *skb)
 }
 
 SEC("tc")
-int test_cls(struct __sk_buff *skb)
+__failure __msg("foo() doesn't return scalar")
+int global_func7(struct __sk_buff *skb)
 {
 	foo(skb);
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c
index d55a6544b1ab..9b9c57fa2dd3 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func8.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func8.c
@@ -3,6 +3,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 __noinline int foo(struct __sk_buff *skb)
 {
@@ -10,7 +11,8 @@ __noinline int foo(struct __sk_buff *skb)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__success
+int global_func8(struct __sk_buff *skb)
 {
 	if (!foo(skb))
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func9.c b/tools/testing/selftests/bpf/progs/test_global_func9.c
index bd233ddede98..1f2cb0159b8d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func9.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func9.c
@@ -2,6 +2,7 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
 
 struct S {
 	int x;
@@ -74,7 +75,8 @@ __noinline int quuz(int **p)
 }
 
 SEC("cgroup_skb/ingress")
-int test_cls(struct __sk_buff *skb)
+__success
+int global_func9(struct __sk_buff *skb)
 {
 	int result = 0;
 
-- 
cgit v1.2.3


From e2b5cfc978f871996d1f8667515c0e06b33e620e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Feb 2023 20:59:54 -0800
Subject: selftests/bpf: Add global subprog context passing tests

Add tests validating that it's possible to pass context arguments into
global subprogs for various types of programs, including a particularly
tricky KPROBE programs (which cover kprobes, uprobes, USDTs, a vast and
important class of programs).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20230216045954.3002473-4-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/test_global_funcs.c   |   2 +
 .../bpf/progs/test_global_func_ctx_args.c          | 104 +++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 2ff4d5c7abfc..e0879df38639 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -18,6 +18,7 @@
 #include "test_global_func15.skel.h"
 #include "test_global_func16.skel.h"
 #include "test_global_func17.skel.h"
+#include "test_global_func_ctx_args.skel.h"
 
 void test_test_global_funcs(void)
 {
@@ -38,4 +39,5 @@ void test_test_global_funcs(void)
 	RUN_TESTS(test_global_func15);
 	RUN_TESTS(test_global_func16);
 	RUN_TESTS(test_global_func17);
+	RUN_TESTS(test_global_func_ctx_args);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
new file mode 100644
index 000000000000..7faa8eef0598
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+static long stack[256];
+
+/*
+ * KPROBE contexts
+ */
+
+__weak int kprobe_typedef_ctx_subprog(bpf_user_pt_regs_t *ctx)
+{
+	return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_typedef_ctx(void *ctx)
+{
+	return kprobe_typedef_ctx_subprog(ctx);
+}
+
+#define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL)))
+
+__weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx)
+{
+	return bpf_get_stack((void *)ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_resolved_ctx(void *ctx)
+{
+	return kprobe_struct_ctx_subprog(ctx);
+}
+
+/* this is current hack to make this work on old kernels */
+struct bpf_user_pt_regs_t {};
+
+__weak int kprobe_workaround_ctx_subprog(struct bpf_user_pt_regs_t *ctx)
+{
+	return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_workaround_ctx(void *ctx)
+{
+	return kprobe_workaround_ctx_subprog(ctx);
+}
+
+/*
+ * RAW_TRACEPOINT contexts
+ */
+
+__weak int raw_tp_ctx_subprog(struct bpf_raw_tracepoint_args *ctx)
+{
+	return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success
+int raw_tp_ctx(void *ctx)
+{
+	return raw_tp_ctx_subprog(ctx);
+}
+
+/*
+ * RAW_TRACEPOINT_WRITABLE contexts
+ */
+
+__weak int raw_tp_writable_ctx_subprog(struct bpf_raw_tracepoint_args *ctx)
+{
+	return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success
+int raw_tp_writable_ctx(void *ctx)
+{
+	return raw_tp_writable_ctx_subprog(ctx);
+}
+
+/*
+ * PERF_EVENT contexts
+ */
+
+__weak int perf_event_ctx_subprog(struct bpf_perf_event_data *ctx)
+{
+	return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?perf_event")
+__success
+int perf_event_ctx(void *ctx)
+{
+	return perf_event_ctx_subprog(ctx);
+}
-- 
cgit v1.2.3


From 181127fb76e62d06ab17a75fd610129688612343 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Fri, 17 Feb 2023 12:13:09 -0800
Subject: Revert "bpf, test_run: fix &xdp_frame misplacement for LIVE_FRAMES"

This reverts commit 6c20822fada1b8adb77fa450d03a0d449686a4a9.

build bot failed on arch with different cache line size:
https://lore.kernel.org/bpf/50c35055-afa9-d01e-9a05-ea5351280e4f@intel.com/

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 net/bpf/test_run.c                                 | 29 +++++-----------------
 .../selftests/bpf/prog_tests/xdp_do_redirect.c     |  7 +++---
 2 files changed, 9 insertions(+), 27 deletions(-)

(limited to 'tools/testing')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 982e81bba6cf..6f3d654b3339 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -97,11 +97,8 @@ reset:
 struct xdp_page_head {
 	struct xdp_buff orig_ctx;
 	struct xdp_buff ctx;
-	union {
-		/* ::data_hard_start starts here */
-		DECLARE_FLEX_ARRAY(struct xdp_frame, frame);
-		DECLARE_FLEX_ARRAY(u8, data);
-	};
+	struct xdp_frame frm;
+	u8 data[];
 };
 
 struct xdp_test_data {
@@ -119,20 +116,6 @@ struct xdp_test_data {
 #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
 #define TEST_XDP_MAX_BATCH 256
 
-#if BITS_PER_LONG == 64 && PAGE_SIZE == SZ_4K
-/* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE
- * must be updated accordingly when any of these changes, otherwise BPF
- * selftests will fail.
- */
-#ifdef __s390x__
-#define TEST_MAX_PKT_SIZE 3216
-#else
-#define TEST_MAX_PKT_SIZE 3408
-#endif
-static_assert(SKB_WITH_OVERHEAD(TEST_XDP_FRAME_SIZE - XDP_PACKET_HEADROOM) ==
-	      TEST_MAX_PKT_SIZE);
-#endif
-
 static void xdp_test_run_init_page(struct page *page, void *arg)
 {
 	struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
@@ -149,8 +132,8 @@ static void xdp_test_run_init_page(struct page *page, void *arg)
 	headroom -= meta_len;
 
 	new_ctx = &head->ctx;
-	frm = head->frame;
-	data = head->data;
+	frm = &head->frm;
+	data = &head->data;
 	memcpy(data + headroom, orig_ctx->data_meta, frm_len);
 
 	xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq);
@@ -240,7 +223,7 @@ static void reset_ctx(struct xdp_page_head *head)
 	head->ctx.data = head->orig_ctx.data;
 	head->ctx.data_meta = head->orig_ctx.data_meta;
 	head->ctx.data_end = head->orig_ctx.data_end;
-	xdp_update_frame_from_buff(&head->ctx, head->frame);
+	xdp_update_frame_from_buff(&head->ctx, &head->frm);
 }
 
 static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
@@ -302,7 +285,7 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog,
 		head = phys_to_virt(page_to_phys(page));
 		reset_ctx(head);
 		ctx = &head->ctx;
-		frm = head->frame;
+		frm = &head->frm;
 		xdp->frame_cnt++;
 
 		act = bpf_prog_run_xdp(prog, ctx);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 7271a18ab3e2..2666c84dbd01 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -65,13 +65,12 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
 }
 
 /* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
- * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM =
- * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one.
+ * sizeof(struct skb_shared_info) - XDP_PACKET_HEADROOM = 3368 bytes
  */
 #if defined(__s390x__)
-#define MAX_PKT_SIZE 3216
+#define MAX_PKT_SIZE 3176
 #else
-#define MAX_PKT_SIZE 3408
+#define MAX_PKT_SIZE 3368
 #endif
 static void test_max_pkt_size(int fd)
 {
-- 
cgit v1.2.3


From 168de0233586fb06c5c5c56304aa9a928a09b0ba Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Fri, 17 Feb 2023 12:55:15 -0800
Subject: selftests/bpf: Add bpf_fib_lookup test

This patch tests the bpf_fib_lookup helper when looking up
a neigh in NUD_FAILED and NUD_STALE state. It also adds test
for the new BPF_FIB_LOOKUP_SKIP_NEIGH flag.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230217205515.3583372-2-martin.lau@linux.dev
---
 .../testing/selftests/bpf/prog_tests/fib_lookup.c  | 187 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/fib_lookup.c     |  22 +++
 2 files changed, 209 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/fib_lookup.c
 create mode 100644 tools/testing/selftests/bpf/progs/fib_lookup.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
new file mode 100644
index 000000000000..61ccddccf485
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "fib_lookup.skel.h"
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto fail;				\
+	})
+
+#define NS_TEST			"fib_lookup_ns"
+#define IPV6_IFACE_ADDR		"face::face"
+#define IPV6_NUD_FAILED_ADDR	"face::1"
+#define IPV6_NUD_STALE_ADDR	"face::2"
+#define IPV4_IFACE_ADDR		"10.0.0.254"
+#define IPV4_NUD_FAILED_ADDR	"10.0.0.1"
+#define IPV4_NUD_STALE_ADDR	"10.0.0.2"
+#define DMAC			"11:11:11:11:11:11"
+#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
+
+struct fib_lookup_test {
+	const char *desc;
+	const char *daddr;
+	int expected_ret;
+	int lookup_flags;
+	__u8 dmac[6];
+};
+
+static const struct fib_lookup_test tests[] = {
+	{ .desc = "IPv6 failed neigh",
+	  .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, },
+	{ .desc = "IPv6 stale neigh",
+	  .daddr = IPV6_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .dmac = DMAC_INIT, },
+	{ .desc = "IPv6 skip neigh",
+	  .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+	{ .desc = "IPv4 failed neigh",
+	  .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, },
+	{ .desc = "IPv4 stale neigh",
+	  .daddr = IPV4_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .dmac = DMAC_INIT, },
+	{ .desc = "IPv4 skip neigh",
+	  .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+	  .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+};
+
+static int ifindex;
+
+static int setup_netns(void)
+{
+	int err;
+
+	SYS("ip link add veth1 type veth peer name veth2");
+	SYS("ip link set dev veth1 up");
+
+	SYS("ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
+	SYS("ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
+	SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
+
+	SYS("ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR);
+	SYS("ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
+	SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+
+	err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1");
+	if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)"))
+		goto fail;
+
+	err = write_sysctl("/proc/sys/net/ipv6/conf/veth1/forwarding", "1");
+	if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
+		goto fail;
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr)
+{
+	int ret;
+
+	memset(params, 0, sizeof(*params));
+
+	params->l4_protocol = IPPROTO_TCP;
+	params->ifindex = ifindex;
+
+	if (inet_pton(AF_INET6, daddr, params->ipv6_dst) == 1) {
+		params->family = AF_INET6;
+		ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
+		if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
+			return -1;
+		return 0;
+	}
+
+	ret = inet_pton(AF_INET, daddr, &params->ipv4_dst);
+	if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
+		return -1;
+	params->family = AF_INET;
+	ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
+	if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
+		return -1;
+
+	return 0;
+}
+
+static void mac_str(char *b, const __u8 *mac)
+{
+	sprintf(b, "%02X:%02X:%02X:%02X:%02X:%02X",
+		mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+}
+
+void test_fib_lookup(void)
+{
+	struct bpf_fib_lookup *fib_params;
+	struct nstoken *nstoken = NULL;
+	struct __sk_buff skb = { };
+	struct fib_lookup *skel;
+	int prog_fd, err, ret, i;
+
+	/* The test does not use the skb->data, so
+	 * use pkt_v6 for both v6 and v4 test.
+	 */
+	LIBBPF_OPTS(bpf_test_run_opts, run_opts,
+		    .data_in = &pkt_v6,
+		    .data_size_in = sizeof(pkt_v6),
+		    .ctx_in = &skb,
+		    .ctx_size_in = sizeof(skb),
+	);
+
+	skel = fib_lookup__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+		return;
+	prog_fd = bpf_program__fd(skel->progs.fib_lookup);
+
+	SYS("ip netns add %s", NS_TEST);
+
+	nstoken = open_netns(NS_TEST);
+	if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+		goto fail;
+
+	if (setup_netns())
+		goto fail;
+
+	ifindex = if_nametoindex("veth1");
+	skb.ifindex = ifindex;
+	fib_params = &skel->bss->fib_params;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		printf("Testing %s\n", tests[i].desc);
+
+		if (set_lookup_params(fib_params, tests[i].daddr))
+			continue;
+		skel->bss->fib_lookup_ret = -1;
+		skel->bss->lookup_flags = BPF_FIB_LOOKUP_OUTPUT |
+			tests[i].lookup_flags;
+
+		err = bpf_prog_test_run_opts(prog_fd, &run_opts);
+		if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+			continue;
+
+		ASSERT_EQ(tests[i].expected_ret, skel->bss->fib_lookup_ret,
+			  "fib_lookup_ret");
+
+		ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
+		if (!ASSERT_EQ(ret, 0, "dmac not match")) {
+			char expected[18], actual[18];
+
+			mac_str(expected, tests[i].dmac);
+			mac_str(actual, fib_params->dmac);
+			printf("dmac expected %s actual %s\n", expected, actual);
+		}
+	}
+
+fail:
+	if (nstoken)
+		close_netns(nstoken);
+	system("ip netns del " NS_TEST " &> /dev/null");
+	fib_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/fib_lookup.c b/tools/testing/selftests/bpf/progs/fib_lookup.c
new file mode 100644
index 000000000000..c4514dd58c62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fib_lookup.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tracing_net.h"
+
+struct bpf_fib_lookup fib_params = {};
+int fib_lookup_ret = 0;
+int lookup_flags = 0;
+
+SEC("tc")
+int fib_lookup(struct __sk_buff *skb)
+{
+	fib_lookup_ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params),
+					lookup_flags);
+
+	return TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 436864095a95fcc611c20c44a111985fa9848730 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 16 Feb 2023 13:43:40 +0100
Subject: selftests/net: Interpret UDP_GRO cmsg data as an int value

Data passed to user-space with a (SOL_UDP, UDP_GRO) cmsg carries an
int (see udp_cmsg_recv), not a u16 value, as strace confirms:

  recvmsg(8, {msg_name=...,
              msg_iov=[{iov_base="\0\0..."..., iov_len=96000}],
              msg_iovlen=1,
              msg_control=[{cmsg_len=20,         <-- sizeof(cmsghdr) + 4
                            cmsg_level=SOL_UDP,
                            cmsg_type=0x68}],    <-- UDP_GRO
                            msg_controllen=24,
                            msg_flags=0}, 0) = 11200

Interpreting the data as an u16 value won't work on big-endian platforms.
Since it is too late to back out of this API decision [1], fix the test.

[1]: https://lore.kernel.org/netdev/20230131174601.203127-1-jakub@cloudflare.com/

Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO")
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgso_bench_rx.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index 4058c7451e70..f35a924d4a30 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -214,11 +214,10 @@ static void do_verify_udp(const char *data, int len)
 
 static int recv_msg(int fd, char *buf, int len, int *gso_size)
 {
-	char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+	char control[CMSG_SPACE(sizeof(int))] = {0};
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 	struct cmsghdr *cmsg;
-	uint16_t *gsosizeptr;
 	int ret;
 
 	iov.iov_base = buf;
@@ -237,8 +236,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
 		     cmsg = CMSG_NXTHDR(&msg, cmsg)) {
 			if (cmsg->cmsg_level == SOL_UDP
 			    && cmsg->cmsg_type == UDP_GRO) {
-				gsosizeptr = (uint16_t *) CMSG_DATA(cmsg);
-				*gso_size = *gsosizeptr;
+				*gso_size = *(int *)CMSG_DATA(cmsg);
 				break;
 			}
 		}
-- 
cgit v1.2.3


From 3a7d84eae03bef4c02c39822b2ea6be5ac73de7b Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 17 Feb 2023 13:28:50 +0100
Subject: self-tests: more rps self tests

Explicitly check for child netns and main ns independency

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rps_default_mask.sh | 41 +++++++++++++++++--------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index c81c0ac7ddfe..0fd0d2db3abc 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -8,7 +8,9 @@ ret=0
 [ $cpus -gt 2 ] || exit $ksft_skip
 
 readonly INITIAL_RPS_DEFAULT_MASK=$(cat /proc/sys/net/core/rps_default_mask)
-readonly NETNS="ns-$(mktemp -u XXXXXX)"
+readonly TAG="$(mktemp -u XXXXXX)"
+readonly VETH="veth${TAG}"
+readonly NETNS="ns-${TAG}"
 
 setup() {
 	ip netns add "${NETNS}"
@@ -21,11 +23,15 @@ cleanup() {
 }
 
 chk_rps() {
-	local rps_mask expected_rps_mask=$3
-	local dev_name=$2
+	local rps_mask expected_rps_mask=$4
+	local dev_name=$3
+	local netns=$2
+	local cmd="cat"
 	local msg=$1
 
-	rps_mask=$(ip netns exec $NETNS cat /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
+	[ -n "$netns" ] && cmd="ip netns exec $netns $cmd"
+
+	rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
 	printf "%-60s" "$msg"
 	if [ $rps_mask -eq $expected_rps_mask ]; then
 		echo "[ ok ]"
@@ -39,19 +45,30 @@ trap cleanup EXIT
 
 echo 0 > /proc/sys/net/core/rps_default_mask
 setup
-chk_rps "empty rps_default_mask" lo 0
+chk_rps "empty rps_default_mask" $NETNS lo 0
 cleanup
 
 echo 1 > /proc/sys/net/core/rps_default_mask
 setup
-chk_rps "non zero rps_default_mask" lo 1
+chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
 
 echo 3 > /proc/sys/net/core/rps_default_mask
-chk_rps "changing rps_default_mask dont affect existing netns" lo 1
+chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+ip link set dev $VETH up
+ip -n $NETNS link set dev $VETH up
+chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+ip netns del $NETNS
+
+setup
+chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
+
+ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
+chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
 
-ip -n $NETNS link add type veth
-ip -n $NETNS link set dev veth0 up
-ip -n $NETNS link set dev veth1 up
-chk_rps "changing rps_default_mask affect newly created devices" veth0 3
-chk_rps "changing rps_default_mask affect newly created devices[II]" veth1 3
 exit $ret
-- 
cgit v1.2.3


From b60417a9f2b890a8094477b2204d4f73c535725e Mon Sep 17 00:00:00 2001
From: Roxana Nicolescu <roxana.nicolescu@canonical.com>
Date: Mon, 20 Feb 2023 12:04:00 +0100
Subject: selftest: fib_tests: Always cleanup before exit

Usage of `set -e` before executing a command causes immediate exit
on failure, without cleanup up the resources allocated at setup.
This can affect the next tests that use the same resources,
leading to a chain of failures.

A simple fix is to always call cleanup function when the script exists.
This approach is already used by other existing tests.

Fixes: 1056691b2680 ("selftests: fib_tests: Make test results more verbose")
Signed-off-by: Roxana Nicolescu <roxana.nicolescu@canonical.com>
Link: https://lore.kernel.org/r/20230220110400.26737-2-roxana.nicolescu@canonical.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/fib_tests.sh | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 5637b5dadabd..70ea8798b1f6 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -2065,6 +2065,8 @@ EOF
 ################################################################################
 # main
 
+trap cleanup EXIT
+
 while getopts :t:pPhv o
 do
 	case $o in
-- 
cgit v1.2.3


From f922c7b1c1c45740d329bf248936fdb78c0cff6e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Mon, 20 Feb 2023 14:23:36 +0100
Subject: sefltests: netdevsim: wait for devlink instance after netns removal

When devlink instance is put into network namespace and that network
namespace gets deleted, devlink instance is moved back into init_ns.
This is done as a part of cleanup_net() routine. Since cleanup_net()
is called asynchronously from workqueue, there is no guarantee that
the devlink instance move is done after "ip netns del" returns.

So fix this race by making sure that the devlink instance is present
before any other operation.

Reported-by: Amir Tzin <amirtz@nvidia.com>
Fixes: b74c37fd35a2 ("selftests: netdevsim: add tests for devlink reload with resources")
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Link: https://lore.kernel.org/r/20230220132336.198597-1-jiri@resnulli.us
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../testing/selftests/drivers/net/netdevsim/devlink.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index a08c02abde12..7f7d20f22207 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -17,6 +17,18 @@ SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
 DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
 DL_HANDLE=netdevsim/$DEV_NAME
 
+wait_for_devlink()
+{
+	"$@" | grep -q $DL_HANDLE
+}
+
+devlink_wait()
+{
+	local timeout=$1
+
+	busywait "$timeout" wait_for_devlink devlink dev
+}
+
 fw_flash_test()
 {
 	RET=0
@@ -256,6 +268,9 @@ netns_reload_test()
 	ip netns del testns2
 	ip netns del testns1
 
+	# Wait until netns async cleanup is done.
+	devlink_wait 2000
+
 	log_test "netns reload test"
 }
 
@@ -348,6 +363,9 @@ resource_test()
 	ip netns del testns2
 	ip netns del testns1
 
+	# Wait until netns async cleanup is done.
+	devlink_wait 2000
+
 	log_test "resource test"
 }
 
-- 
cgit v1.2.3