From 115d9d77bb0f9152c60b6e8646369fa7f6167593 Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Fri, 6 Jan 2023 22:22:44 +0000 Subject: mm: Always release pages to the buddy allocator in memblock_free_late(). If CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, memblock_free_pages() only releases pages to the buddy allocator if they are not in the deferred range. This is correct for free pages (as defined by for_each_free_mem_pfn_range_in_zone()) because free pages in the deferred range will be initialized and released as part of the deferred init process. memblock_free_pages() is called by memblock_free_late(), which is used to free reserved ranges after memblock_free_all() has run. All pages in reserved ranges have been initialized at that point, and accordingly, those pages are not touched by the deferred init process. This means that currently, if the pages that memblock_free_late() intends to release are in the deferred range, they will never be released to the buddy allocator. They will forever be reserved. In addition, memblock_free_pages() calls kmsan_memblock_free_pages(), which is also correct for free pages but is not correct for reserved pages. KMSAN metadata for reserved pages is initialized by kmsan_init_shadow(), which runs shortly before memblock_free_all(). For both of these reasons, memblock_free_pages() should only be called for free pages, and memblock_free_late() should call __free_pages_core() directly instead. One case where this issue can occur in the wild is EFI boot on x86_64. The x86 EFI code reserves all EFI boot services memory ranges via memblock_reserve() and frees them later via memblock_free_late() (efi_reserve_boot_services() and efi_free_boot_services(), respectively). If any of those ranges happens to fall within the deferred init range, the pages will not be released and that memory will be unavailable. For example, on an Amazon EC2 t3.micro VM (1 GB) booting via EFI: v6.2-rc2: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 178867 v6.2-rc2 + patch: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 222816 # +43,949 pages Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/01010185892de53e-e379acfb-7044-4b24-b30a-e2657c1ba989-000000@us-west-2.amazonses.com Signed-off-by: Mike Rapoport (IBM) --- tools/testing/memblock/internal.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..85973e55489e 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,6 +15,10 @@ bool mirrored_kernelcore = false; struct page {}; +void __free_pages_core(struct page *page, unsigned int order) +{ +} + void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { -- cgit v1.2.3 From 7d6ceeb1875cc08dc3d1e558e191434d94840cd5 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Sat, 7 Jan 2023 04:40:20 +0100 Subject: af_unix: selftest: Fix the size of the parameter to connect() Adjust size parameter in connect() to match the type of the parameter, to fix "No such file or directory" error in selftests/net/af_unix/ test_oob_unix.c:127. The existing code happens to work provided that the autogenerated pathname is shorter than sizeof (struct sockaddr), which is why it hasn't been noticed earlier. Visible from the trace excerpt: bind(3, {sa_family=AF_UNIX, sun_path="unix_oob_453059"}, 110) = 0 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fa6a6577a10) = 453060 [pid ] connect(6, {sa_family=AF_UNIX, sun_path="unix_oob_45305"}, 16) = -1 ENOENT (No such file or directory) BUG: The filename is trimmed to sizeof (struct sockaddr). Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Shuah Khan Cc: Kuniyuki Iwashima Cc: Florian Westphal Reviewed-by: Florian Westphal Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- tools/testing/selftests/net/af_unix/test_unix_oob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f2..532459a15067 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); -- cgit v1.2.3 From e59370b2e96eb8e7e057a2a16e999ff385a3f2fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:41 +0100 Subject: selftests/net: l2_tos_ttl_inherit.sh: Set IPv6 addresses with "nodad". The ping command can run before DAD completes. In that case, ping may fail and break the selftest. We don't need DAD here since we're working on isolated device pairs. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/l2_tos_ttl_inherit.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a8..e2574b08eabc 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -137,8 +137,8 @@ setup() { if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 + ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad + $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad ip link add name tep0 type $type $local_addr1 \ remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ $vxlan $geneve @@ -170,8 +170,8 @@ setup() { ip addr add 198.19.0.1/24 brd + dev ${parent}0 $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad + $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad fi } -- cgit v1.2.3 From c53cb00f7983a5474f2d36967f84908b85af9159 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:46 +0100 Subject: selftests/net: l2_tos_ttl_inherit.sh: Run tests in their own netns. This selftest currently runs half in the current namespace and half in a netns of its own. Therefore, the test can fail if the current namespace is already configured with incompatible parameters (for example if it already has a veth0 interface). Adapt the script to put both ends of the veth pair in their own netns. Now veth0 is created in NS0 instead of the current namespace, while veth1 is set up in NS1 (instead of the 'testing' netns). The user visible netns names are randomised to minimise the risk of conflicts with already existing namespaces. The cleanup() function doesn't need to remove the virtual interface anymore: deleting NS0 and NS1 automatically removes the virtual interfaces they contained. We can remove $ns, which was only used to run ip commands in the 'testing' netns (let's use the builtin "-netns" option instead). However, we still need a similar functionality as ping and tcpdump now need to run in NS0. So we now have $RUN_NS0 for that. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/l2_tos_ttl_inherit.sh | 162 +++++++++++++--------- 1 file changed, 93 insertions(+), 69 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index e2574b08eabc..cf56680d598f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -25,6 +25,11 @@ expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +66,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +98,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +132,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +205,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +229,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +246,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +267,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +292,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +309,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,11 +330,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid @@ -351,9 +376,8 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } printf "┌────────┬───────┬───────┬──────────────┬" -- cgit v1.2.3 From d68ff8ad3351b8fc8d6f14b9a4f5cc8ba3e8bd13 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:50 +0100 Subject: selftests/net: l2_tos_ttl_inherit.sh: Ensure environment cleanup on failure. Use 'set -e' and an exit handler to stop the script if a command fails and ensure the test environment is cleaned up in any case. Also, handle the case where the script is interrupted by SIGINT. The only command that's expected to fail is 'wait $ping_pid', since it's killed by the script. Handle this case with '|| true' to make it play well with 'set -e'. Finally, return the Kselftest SKIP code (4) when the script breaks because of an environment problem or a command line failure. The 0 and 1 return codes should now reliably indicate that all tests have been run (0: all tests run and passed, 1: all tests run but at least one failed, 4: test script didn't run completely). Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/l2_tos_ttl_inherit.sh | 40 ++++++++++++++++++++--- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index cf56680d598f..f11756e7df2f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,13 +12,16 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" @@ -340,7 +343,7 @@ verify() { fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -380,6 +383,31 @@ cleanup() { ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -409,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi -- cgit v1.2.3 From c273289fac370b6488757236cd62cc2cf04830b7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Jan 2023 12:54:42 +0100 Subject: selftests: netfilter: fix transaction test script timeout handling The kselftest framework uses a default timeout of 45 seconds for all test scripts. Increase the timeout to two minutes for the netfilter tests, this should hopefully be enough, Make sure that, should the script be canceled, the net namespace and the spawned ping instances are removed. Fixes: 25d8bcedbf43 ("selftests: add script to stress-test nft packet path vs. control plane") Reported-by: Mirsad Goran Todorovac Signed-off-by: Florian Westphal Tested-by: Mirsad Goran Todorovac Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_trans_stress.sh | 16 +++++++++------- tools/testing/selftests/netfilter/settings | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/netfilter/settings (limited to 'tools/testing') diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh index a7f62ad4f661..2ffba45a78bf 100755 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -10,12 +10,20 @@ ksft_skip=4 testns=testns-$(mktemp -u "XXXXXXXX") +tmp="" tables="foo bar baz quux" global_ret=0 eret=0 lret=0 +cleanup() { + ip netns pids "$testns" | xargs kill 2>/dev/null + ip netns del "$testns" + + rm -f "$tmp" +} + check_result() { local r=$1 @@ -43,6 +51,7 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +trap cleanup EXIT tmp=$(mktemp) for table in $tables; do @@ -139,11 +148,4 @@ done check_result $lret "add/delete with nftrace enabled" -pkill -9 ping - -wait - -rm -f "$tmp" -ip netns del "$testns" - exit $global_ret diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/netfilter/settings @@ -0,0 +1 @@ +timeout=120 -- cgit v1.2.3 From 5316a017d093f644675a56523bcf5787ba8f4fef Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 6 Jan 2023 22:30:14 +0300 Subject: proc: fix PIE proc-empty-vm, proc-pid-vm tests vsyscall detection code uses direct call to the beginning of the vsyscall page: asm ("call %P0" :: "i" (0xffffffffff600000)) It generates "call rel32" instruction but it is not relocated if binary is PIE, so binary segfaults into random userspace address and vsyscall page status is detected incorrectly. Do more direct: asm ("call *%rax") which doesn't do need any relocaltions. Mark g_vsyscall as volatile for a good measure, I didn't find instruction setting it to 0. Now the code is obviously correct: xor eax, eax mov rdi, rbp mov rsi, rbp mov DWORD PTR [rip+0x2d15], eax # g_vsyscall = 0 mov rax, 0xffffffffff600000 call rax mov DWORD PTR [rip+0x2d02], 1 # g_vsyscall = 1 mov eax, DWORD PTR ds:0xffffffffff600000 mov DWORD PTR [rip+0x2cf1], 2 # g_vsyscall = 2 mov edi, [rip+0x2ceb] # exit(g_vsyscall) call exit Note: fixed proc-empty-vm test oopses 5.19.0-28-generic kernel but this is separate story. Link: https://lkml.kernel.org/r/Y7h2xvzKLg36DSq8@p183 Fixes: 5bc73bb3451b9 ("proc: test how it holds up with mapping'less process") Signed-off-by: Alexey Dobriyan Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-empty-vm.c | 12 +++++++----- tools/testing/selftests/proc/proc-pid-vm.c | 9 +++++---- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index d95b1cb43d9d..7588428b8fcd 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -25,6 +25,7 @@ #undef NDEBUG #include #include +#include #include #include #include @@ -41,7 +42,7 @@ * 1: vsyscall VMA is --xp vsyscall=xonly * 2: vsyscall VMA is r-xp vsyscall=emulate */ -static int g_vsyscall; +static volatile int g_vsyscall; static const char *g_proc_pid_maps_vsyscall; static const char *g_proc_pid_smaps_vsyscall; @@ -147,11 +148,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 69551bfa215c..cacbd2a4aec9 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -257,11 +257,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; -- cgit v1.2.3 From 9fdaca2c1e157dc0a3c0faecf3a6a68e7d8d0c7b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 Jan 2023 22:33:07 +0000 Subject: kselftest: Fix error message for unconfigured LLVM builds We are missing a ) when we attempt to complain about not having enough configuration for clang, resulting in the rather inscrutable error: ../lib.mk:23: *** unterminated call to function 'error': missing ')'. Stop. Add the required ) so we print the message we were trying to print. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 291144c284fb..f7900e75d230 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,7 +20,7 @@ CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) ifeq ($(CROSS_COMPILE),) ifeq ($(CLANG_TARGET_FLAGS),) -$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk +$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk) else CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) endif # CLANG_TARGET_FLAGS -- cgit v1.2.3 From 4656d72c1efa495a58ad6d8b073a60907073e4e6 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 12 Jan 2023 18:42:53 +0100 Subject: selftests: mptcp: userspace: validate v4-v6 subflows mix MPTCP protocol supports having subflows in both IPv4 and IPv6. In Linux, it is possible to have that if the MPTCP socket has been created with AF_INET6 family without the IPV6_V6ONLY option. Here, a new IPv4 subflow is being added to the initial IPv6 connection, then being removed using Netlink commands. Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 47 +++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index a29deb9fa024..ab2d581f28a1 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -752,6 +752,52 @@ test_subflows() "$server4_token" > /dev/null 2>&1 } +test_subflows_v4_v6_mix() +{ + # Attempt to add a listener at 10.0.2.1: + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app6_port > /dev/null 2>&1 & + local listener_pid=$! + + # ADD_ADDR4 from server to client machine reusing the subflow port on + # the established v6 connection + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 > /dev/null 2>&1 + stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ + "$server_addr_id" "$app6_port" + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + kill_wait $listener_pid + + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" > /dev/null 2>&1 + sleep 0.5 +} + test_prio() { local count @@ -861,6 +907,7 @@ make_connection "v6" test_announce test_remove test_subflows +test_subflows_v4_v6_mix test_prio test_listener -- cgit v1.2.3 From 0ca2c535f5a07f01118a6a70bfab78576e02fcae Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 16 Jan 2023 11:33:07 -0500 Subject: selftests/vm: remove __USE_GNU in hugetlb-madvise.c __USE_GNU should be an internal macro only used inside glibc. Either memfd_create() or fallocate() requires _GNU_SOURCE per man page, where __USE_GNU will further be defined by glibc headers include/features.h: #ifdef _GNU_SOURCE # define __USE_GNU 1 #endif This fixes: >> hugetlb-madvise.c:20: warning: "__USE_GNU" redefined 20 | #define __USE_GNU | In file included from /usr/include/x86_64-linux-gnu/bits/libc-header-start.h:33, from /usr/include/stdlib.h:26, from hugetlb-madvise.c:16: /usr/include/features.h:407: note: this is the location of the previous definition 407 | # define __USE_GNU 1 | Link: https://lkml.kernel.org/r/Y8V9z+z6Tk7NetI3@x1n Signed-off-by: Peter Xu Reported-by: kernel test robot Cc: Mike Kravetz Signed-off-by: Andrew Morton --- tools/testing/selftests/vm/hugetlb-madvise.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c index a634f47d1e56..9a127a8fe176 100644 --- a/tools/testing/selftests/vm/hugetlb-madvise.c +++ b/tools/testing/selftests/vm/hugetlb-madvise.c @@ -17,7 +17,6 @@ #include #include #include -#define __USE_GNU #include #define MIN_FREE_PAGES 20 -- cgit v1.2.3 From 903848249a781d76d59561d51676c95b3a4d7162 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 18 Jan 2023 10:18:47 -0500 Subject: selftests/net: toeplitz: fix race on tpacket_v3 block close Avoid race between process wakeup and tpacket_v3 block timeout. The test waits for cfg_timeout_msec for packets to arrive. Packets arrive in tpacket_v3 rings, which pass packets ("frames") to the process in batches ("blocks"). The sk waits for req3.tp_retire_blk_tov msec to release a block. Set the block timeout lower than the process waiting time, else the process may find that no block has been released by the time it scans the socket list. Convert to a ring of more than one, smaller, blocks with shorter timeouts. Blocks must be page aligned, so >= 64KB. Fixes: 5ebfb4cc3048 ("selftests/net: toeplitz test") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230118151847.4124260-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/toeplitz.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c index 90026a27eac0..9ba03164d73a 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c @@ -215,7 +215,7 @@ static char *recv_frame(const struct ring_state *ring, char *frame) } /* A single TPACKET_V3 block can hold multiple frames */ -static void recv_block(struct ring_state *ring) +static bool recv_block(struct ring_state *ring) { struct tpacket_block_desc *block; char *frame; @@ -223,7 +223,7 @@ static void recv_block(struct ring_state *ring) block = (void *)(ring->mmap + ring->idx * ring_block_sz); if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) - return; + return false; frame = (char *)block; frame += block->hdr.bh1.offset_to_first_pkt; @@ -235,6 +235,8 @@ static void recv_block(struct ring_state *ring) block->hdr.bh1.block_status = TP_STATUS_KERNEL; ring->idx = (ring->idx + 1) % ring_block_nr; + + return true; } /* simple test: sleep once unconditionally and then process all rings */ @@ -245,7 +247,7 @@ static void process_rings(void) usleep(1000 * cfg_timeout_msec); for (i = 0; i < num_cpus; i++) - recv_block(&rings[i]); + do {} while (recv_block(&rings[i])); fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", frames_received - frames_nohash - frames_error, @@ -257,12 +259,12 @@ static char *setup_ring(int fd) struct tpacket_req3 req3 = {0}; void *ring; - req3.tp_retire_blk_tov = cfg_timeout_msec; + req3.tp_retire_blk_tov = cfg_timeout_msec / 8; req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; req3.tp_frame_size = 2048; req3.tp_frame_nr = 1 << 10; - req3.tp_block_nr = 2; + req3.tp_block_nr = 16; req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; req3.tp_block_size /= req3.tp_block_nr; -- cgit v1.2.3 From b9fa9bc839291020b362ab5392e5f18ba79657ac Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 6 Jan 2023 16:22:14 +0200 Subject: selftests/bpf: Verify copy_register_state() preserves parent/live fields A testcase to check that verifier.c:copy_register_state() preserves register parentage chain and livness information. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230106142214.1040390-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/search_pruning.c | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index 68b14fdfebdb..d63fd8991b03 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -225,3 +225,39 @@ .result_unpriv = ACCEPT, .insn_processed = 15, }, +/* The test performs a conditional 64-bit write to a stack location + * fp[-8], this is followed by an unconditional 8-bit write to fp[-8], + * then data is read from fp[-8]. This sequence is unsafe. + * + * The test would be mistakenly marked as safe w/o dst register parent + * preservation in verifier.c:copy_register_state() function. + * + * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the + * checkpoint state after conditional 64-bit assignment. + */ +{ + "write tracking and register parent chain bug", + .insns = { + /* r6 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* r0 = ktime_get_ns() */ + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + /* if r0 > r6 goto +1 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_6, 1), + /* *(u64 *)(r10 - 8) = 0xdeadbeef */ + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0xdeadbeef), + /* r1 = 42 */ + BPF_MOV64_IMM(BPF_REG_1, 42), + /* *(u8 *)(r10 - 8) = r1 */ + BPF_STX_MEM(BPF_B, BPF_REG_FP, BPF_REG_1, -8), + /* r2 = *(u64 *)(r10 - 8) */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_FP, -8), + /* exit(0) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "invalid read from stack off -8+1 size 8", + .result = REJECT, +}, -- cgit v1.2.3 From 24b5308cf5ee9f52dd22f3af78a5b0cdc9d35e72 Mon Sep 17 00:00:00 2001 From: Pengfei Xu Date: Wed, 11 Jan 2023 16:15:31 +0800 Subject: selftests/filesystems: grant executable permission to run_fat_tests.sh When use tools/testing/selftests/kselftest_install.sh to make the kselftest-list.txt under tools/testing/selftests/kselftest_install. Then use tools/testing/selftests/kselftest_install/run_kselftest.sh to run all the kselftests in kselftest-list.txt, it will be blocked by case "filesystems/fat: run_fat_tests.sh" with "Warning: file run_fat_tests.sh is not executable", so grant executable permission to run_fat_tests.sh to fix this issue. Link: https://lkml.kernel.org/r/dfdbba6df8a1ab34bb1e81cd8bd7ca3f9ed5c369.1673424747.git.pengfei.xu@intel.com Fixes: dd7c9be330d8 ("selftests/filesystems: add a vfat RENAME_EXCHANGE test") Signed-off-by: Pengfei Xu Reviewed-by: Javier Martinez Canillas Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/filesystems/fat/run_fat_tests.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/filesystems/fat/run_fat_tests.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh old mode 100644 new mode 100755 -- cgit v1.2.3 From 50aa870ba2f7735f556e52d15f61cd0f359c4c0b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 22 Jan 2023 04:04:50 -0500 Subject: selftests: kvm: move declaration at the beginning of main() Placing a declaration of evt_reset is pedantically invalid according to the C standard. While GCC does not really care and only warns with -Wpedantic, clang ignores the declaration altogether with an error: x86_64/xen_shinfo_test.c:965:2: error: expected expression struct kvm_xen_hvm_attr evt_reset = { ^ x86_64/xen_shinfo_test.c:969:38: error: use of undeclared identifier evt_reset vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); ^ Reported-by: Yu Zhang Reported-by: Sean Christopherson Fixes: a79b53aaaab5 ("KVM: x86: fix deadlock for KVM_XEN_EVTCHN_RESET", 2022-12-28) Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index dae510c263b4..13c75dc18c10 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -434,6 +434,7 @@ static void *juggle_shinfo_state(void *arg) int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; + struct kvm_xen_hvm_attr evt_reset; struct kvm_vm *vm; pthread_t thread; bool verbose; @@ -962,10 +963,8 @@ int main(int argc, char *argv[]) } done: - struct kvm_xen_hvm_attr evt_reset = { - .type = KVM_XEN_ATTR_TYPE_EVTCHN, - .u.evtchn.flags = KVM_XEN_EVTCHN_RESET, - }; + evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; + evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); alarm(0); -- cgit v1.2.3 From c2c46b10d52624376322b01654095a84611c7e09 Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Wed, 11 Jan 2023 10:34:08 -0800 Subject: KVM: selftests: Make reclaim_period_ms input always be positive reclaim_period_ms used to be positive only but the commit 0001725d0f9b ("KVM: selftests: Add atoi_positive() and atoi_non_negative() for input validation") incorrectly changed it to non-negative validation. Change validation to allow only positive input. Fixes: 0001725d0f9b ("KVM: selftests: Add atoi_positive() and atoi_non_negative() for input validation") Signed-off-by: Vipin Sharma Reported-by: Ben Gardon Reviewed-by: Ben Gardon Reviewed-by: Sean Christopherson Message-Id: <20230111183408.104491-1-vipinsh@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index ea0978f22db8..251794f83719 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,7 +241,7 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi_non_negative("Reclaim period", optarg); + reclaim_period_ms = atoi_positive("Reclaim period", optarg); break; case 't': token = atoi_paranoid(optarg); -- cgit v1.2.3 From b4ea530d024ca6095fc80290075893a5b7136516 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sat, 21 Jan 2023 13:41:45 +0100 Subject: selftests/bpf: Pass BPF skeleton to sockmap_listen ops tests Following patch extends the sockmap ops tests to cover the scenario when a sockmap with attached programs holds listening sockets. Pass the BPF skeleton to sockmap ops test so that the can access and attach the BPF programs. Signed-off-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230113-sockmap-fix-v2-3-1e0ee7ac2f90@cloudflare.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/sockmap_listen.c | 55 +++++++++++++++------- 1 file changed, 37 insertions(+), 18 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 2cf0c7a3fe23..499fba8f55b9 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -30,6 +30,8 @@ #define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 +#define __always_unused __attribute__((__unused__)) + #define _FAIL(errnum, fmt...) \ ({ \ error_at_line(0, (errnum), __func__, __LINE__, fmt); \ @@ -321,7 +323,8 @@ static int socket_loopback(int family, int sotype) return socket_loopback_reuseport(family, sotype, -1); } -static void test_insert_invalid(int family, int sotype, int mapfd) +static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u32 key = 0; u64 value; @@ -338,7 +341,8 @@ static void test_insert_invalid(int family, int sotype, int mapfd) FAIL_ERRNO("map_update: expected EBADF"); } -static void test_insert_opened(int family, int sotype, int mapfd) +static void test_insert_opened(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u32 key = 0; u64 value; @@ -359,7 +363,8 @@ static void test_insert_opened(int family, int sotype, int mapfd) xclose(s); } -static void test_insert_bound(int family, int sotype, int mapfd) +static void test_insert_bound(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; socklen_t len; @@ -386,7 +391,8 @@ close: xclose(s); } -static void test_insert(int family, int sotype, int mapfd) +static void test_insert(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u64 value; u32 key; @@ -402,7 +408,8 @@ static void test_insert(int family, int sotype, int mapfd) xclose(s); } -static void test_delete_after_insert(int family, int sotype, int mapfd) +static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u64 value; u32 key; @@ -419,7 +426,8 @@ static void test_delete_after_insert(int family, int sotype, int mapfd) xclose(s); } -static void test_delete_after_close(int family, int sotype, int mapfd) +static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { int err, s; u64 value; @@ -442,7 +450,8 @@ static void test_delete_after_close(int family, int sotype, int mapfd) FAIL_ERRNO("map_delete: expected EINVAL/EINVAL"); } -static void test_lookup_after_insert(int family, int sotype, int mapfd) +static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u64 cookie, value; socklen_t len; @@ -470,7 +479,8 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd) xclose(s); } -static void test_lookup_after_delete(int family, int sotype, int mapfd) +static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { int err, s; u64 value; @@ -493,7 +503,8 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd) xclose(s); } -static void test_lookup_32_bit_value(int family, int sotype, int mapfd) +static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { u32 key, value32; int err, s; @@ -523,7 +534,8 @@ close: xclose(s); } -static void test_update_existing(int family, int sotype, int mapfd) +static void test_update_existing(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { int s1, s2; u64 value; @@ -551,7 +563,8 @@ close_s1: /* Exercise the code path where we destroy child sockets that never * got accept()'ed, aka orphans, when parent socket gets closed. */ -static void test_destroy_orphan_child(int family, int sotype, int mapfd) +static void test_destroy_orphan_child(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; socklen_t len; @@ -585,7 +598,8 @@ close_srv: /* Perform a passive open after removing listening socket from SOCKMAP * to ensure that callbacks get restored properly. */ -static void test_clone_after_delete(int family, int sotype, int mapfd) +static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; socklen_t len; @@ -621,7 +635,8 @@ close_srv: * SOCKMAP, but got accept()'ed only after the parent has been removed * from SOCKMAP, gets cloned without parent psock state or callbacks. */ -static void test_accept_after_delete(int family, int sotype, int mapfd) +static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; const u32 zero = 0; @@ -675,7 +690,8 @@ close_srv: /* Check that child socket that got created and accepted while parent * was in a SOCKMAP is cloned without parent psock state or callbacks. */ -static void test_accept_before_delete(int family, int sotype, int mapfd) +static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct sockaddr_storage addr; const u32 zero = 0, one = 1; @@ -784,7 +800,8 @@ done: return NULL; } -static void test_syn_recv_insert_delete(int family, int sotype, int mapfd) +static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused, + int family, int sotype, int mapfd) { struct connect_accept_ctx ctx = { 0 }; struct sockaddr_storage addr; @@ -847,7 +864,8 @@ static void *listen_thread(void *arg) return NULL; } -static void test_race_insert_listen(int family, int socktype, int mapfd) +static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused, + int family, int socktype, int mapfd) { struct connect_accept_ctx ctx = { 0 }; const u32 zero = 0; @@ -1473,7 +1491,8 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { const struct op_test { - void (*fn)(int family, int sotype, int mapfd); + void (*fn)(struct test_sockmap_listen *skel, + int family, int sotype, int mapfd); const char *name; int sotype; } tests[] = { @@ -1520,7 +1539,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, if (!test__start_subtest(s)) continue; - t->fn(family, sotype, map_fd); + t->fn(skel, family, sotype, map_fd); test_ops_cleanup(map); } } -- cgit v1.2.3 From c88ea16a8f892bce3bfb3f6a0d91b2bb27df8f59 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sat, 21 Jan 2023 13:41:46 +0100 Subject: selftests/bpf: Cover listener cloning with progs attached to sockmap Today we test if a child socket is cloned properly from a listening socket inside a sockmap only when there are no BPF programs attached to the map. A bug has been reported [1] for the case when sockmap has a verdict program attached. So cover this case as well to prevent regressions. [1]: https://lore.kernel.org/r/00000000000073b14905ef2e7401@google.com Signed-off-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230113-sockmap-fix-v2-4-1e0ee7ac2f90@cloudflare.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/sockmap_listen.c | 30 ++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 499fba8f55b9..567e07c19ecc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -563,8 +563,7 @@ close_s1: /* Exercise the code path where we destroy child sockets that never * got accept()'ed, aka orphans, when parent socket gets closed. */ -static void test_destroy_orphan_child(struct test_sockmap_listen *skel __always_unused, - int family, int sotype, int mapfd) +static void do_destroy_orphan_child(int family, int sotype, int mapfd) { struct sockaddr_storage addr; socklen_t len; @@ -595,6 +594,33 @@ close_srv: xclose(s); } +static void test_destroy_orphan_child(struct test_sockmap_listen *skel, + int family, int sotype, int mapfd) +{ + int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + const struct test { + int progfd; + enum bpf_attach_type atype; + } tests[] = { + { -1, -1 }, + { msg_verdict, BPF_SK_MSG_VERDICT }, + { skb_verdict, BPF_SK_SKB_VERDICT }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + if (t->progfd != -1 && + xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0) + return; + + do_destroy_orphan_child(family, sotype, mapfd); + + if (t->progfd != -1) + xbpf_prog_detach2(t->progfd, mapfd, t->atype); + } +} + /* Perform a passive open after removing listening socket from SOCKMAP * to ensure that callbacks get restored properly. */ -- cgit v1.2.3 From a49fb7218ed84a4c5e6c56b9fd933498b9730912 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Sat, 21 Jan 2023 08:41:35 -0800 Subject: selftests: amd-pstate: Don't delete source files via Makefile Revert the portion of a recent Makefile change that incorrectly deletes source files when doing "make clean". Fixes: ba2d788aa873 ("selftests: amd-pstate: Trigger tbench benchmark and test cpus") Reported-by: Sedat Dilek Tested-by: Sedat Dilek Reviewed-by: Sedat Dilek Acked-by: Huang Rui Signed-off-by: Doug Smythies Signed-off-by: Shuah Khan --- tools/testing/selftests/amd-pstate/Makefile | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile index 5f195ee756d6..5fd1424db37d 100644 --- a/tools/testing/selftests/amd-pstate/Makefile +++ b/tools/testing/selftests/amd-pstate/Makefile @@ -7,11 +7,6 @@ all: uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -ifeq (x86,$(ARCH)) -TEST_GEN_FILES += ../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py -TEST_GEN_FILES += ../../../power/x86/intel_pstate_tracer/intel_pstate_tracer.py -endif - TEST_PROGS := run.sh TEST_FILES := basic.sh tbench.sh gitsource.sh -- cgit v1.2.3 From a6efc42a86c0c87cfe2f1c3d1f09a4c9b13ba890 Mon Sep 17 00:00:00 2001 From: Andrei Gherzan Date: Thu, 26 Jan 2023 16:55:48 +0000 Subject: selftest: net: Improve IPV6_TCLASS/IPV6_HOPLIMIT tests apparmor compatibility "tcpdump" is used to capture traffic in these tests while using a random, temporary and not suffixed file for it. This can interfere with apparmor configuration where the tool is only allowed to read from files with 'known' extensions. The MINE type application/vnd.tcpdump.pcap was registered with IANA for pcap files and .pcap is the extension that is both most common but also aligned with standard apparmor configurations. See TCPDUMP(8) for more details. This improves compatibility with standard apparmor configurations by using ".pcap" as the file extension for the tests' temporary files. Signed-off-by: Andrei Gherzan Signed-off-by: David S. Miller --- tools/testing/selftests/net/cmsg_ipv6.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index 2d89cb0ad288..330d0b1ceced 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -6,7 +6,7 @@ ksft_skip=4 NS=ns IP6=2001:db8:1::1/64 TGT6=2001:db8:1::2 -TMPF=`mktemp` +TMPF=$(mktemp --suffix ".pcap") cleanup() { -- cgit v1.2.3 From 0dd8d22a887a473f6f11abf556c4f7944ab5ef1d Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Fri, 27 Jan 2023 21:43:50 +0000 Subject: KVM: selftests: aarch64: Relax userfaultfd read vs. write checks Only Stage1 Page table walks (S1PTW) writing a PTE on an unmapped page should result in a userfaultfd write. However, the userfaultfd tests in page_fault_test wrongly assert that any S1PTW is a PTE write. Fix this by relaxing the read vs. write checks in all userfaultfd handlers. Note that this is also an attempt to focus less on KVM (and userfaultfd) behavior, and more on architectural behavior. Also note that after commit 406504c7b040 ("KVM: arm64: Fix S1PTW handling on RO memslots"), the userfaultfd fault (S1PTW with AF on an unmaped PTE page) is actually a read: the translation fault that comes before the permission fault. Signed-off-by: Ricardo Koller Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230127214353.245671-2-ricarkol@google.com --- .../selftests/kvm/aarch64/page_fault_test.c | 83 +++++++++------------- 1 file changed, 34 insertions(+), 49 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index beb944fa6fd4..0dda58766185 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -304,7 +304,7 @@ static struct uffd_args { /* Returns true to continue the test, and false if it should be skipped. */ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, - struct uffd_args *args, bool expect_write) + struct uffd_args *args) { uint64_t addr = msg->arg.pagefault.address; uint64_t flags = msg->arg.pagefault.flags; @@ -313,7 +313,6 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, "The only expected UFFD mode is MISSING"); - ASSERT_EQ(!!(flags & UFFD_PAGEFAULT_FLAG_WRITE), expect_write); ASSERT_EQ(addr, (uint64_t)args->hva); pr_debug("uffd fault: addr=%p write=%d\n", @@ -337,19 +336,14 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, return 0; } -static int uffd_pt_write_handler(int mode, int uffd, struct uffd_msg *msg) +static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg) { - return uffd_generic_handler(mode, uffd, msg, &pt_args, true); + return uffd_generic_handler(mode, uffd, msg, &pt_args); } -static int uffd_data_write_handler(int mode, int uffd, struct uffd_msg *msg) +static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg) { - return uffd_generic_handler(mode, uffd, msg, &data_args, true); -} - -static int uffd_data_read_handler(int mode, int uffd, struct uffd_msg *msg) -{ - return uffd_generic_handler(mode, uffd, msg, &data_args, false); + return uffd_generic_handler(mode, uffd, msg, &data_args); } static void setup_uffd_args(struct userspace_mem_region *region, @@ -822,7 +816,7 @@ static void help(char *name) .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ .guest_test_check = { _CHECK(_with_af), _test_check }, \ .uffd_data_handler = _uffd_data_handler, \ - .uffd_pt_handler = uffd_pt_write_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ .expected_events = { .uffd_faults = _uffd_faults, }, \ } @@ -878,7 +872,7 @@ static void help(char *name) .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .uffd_data_handler = _uffd_data_handler, \ - .uffd_pt_handler = uffd_pt_write_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ .mmio_handler = _mmio_handler, \ .expected_events = { .mmio_exits = _mmio_exits, \ .uffd_faults = _uffd_faults }, \ @@ -892,7 +886,7 @@ static void help(char *name) .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ .guest_test = _access, \ .uffd_data_handler = _uffd_data_handler, \ - .uffd_pt_handler = uffd_pt_write_handler, \ + .uffd_pt_handler = uffd_pt_handler, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ .expected_events = { .fail_vcpu_runs = 1, \ .uffd_faults = _uffd_faults }, \ @@ -933,29 +927,27 @@ static struct test_desc tests[] = { * (S1PTW). */ TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_read_handler, uffd_pt_write_handler, 2), - /* no_af should also lead to a PT write. */ + uffd_data_handler, uffd_pt_handler, 2), TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_read_handler, uffd_pt_write_handler, 2), - /* Note how that cas invokes the read handler. */ + uffd_data_handler, uffd_pt_handler, 2), TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_read_handler, uffd_pt_write_handler, 2), + uffd_data_handler, uffd_pt_handler, 2), /* * Can't test guest_at with_af as it's IMPDEF whether the AF is set. * The S1PTW fault should still be marked as a write. */ TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_read_handler, uffd_pt_write_handler, 1), + uffd_no_handler, uffd_pt_handler, 1), TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_read_handler, uffd_pt_write_handler, 2), + uffd_data_handler, uffd_pt_handler, 2), TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_write_handler, uffd_pt_write_handler, 2), + uffd_data_handler, uffd_pt_handler, 2), TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_write_handler, uffd_pt_write_handler, 2), + uffd_data_handler, uffd_pt_handler, 2), TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_write_handler, uffd_pt_write_handler, 2), + uffd_data_handler, uffd_pt_handler, 2), TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, - uffd_data_read_handler, uffd_pt_write_handler, 2), + uffd_data_handler, uffd_pt_handler, 2), /* * Try accesses when the data and PT memory regions are both @@ -980,25 +972,25 @@ static struct test_desc tests[] = { * fault, and nothing in the dirty log. Any S1PTW should result in * a write in the dirty log and a userfaultfd write. */ - TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_read_handler, 2, + TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_handler, 2, guest_check_no_write_in_dirty_log), /* no_af should also lead to a PT write. */ - TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_read_handler, 2, + TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_handler, 2, guest_check_no_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_read_handler, + TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_handler, 2, guest_check_no_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, 0, 1, + TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1, guest_check_no_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_read_handler, 2, + TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_handler, 2, guest_check_no_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_write_handler, + TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_handler, 2, guest_check_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_read_handler, 2, + TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_handler, 2, guest_check_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_write_handler, + TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_handler, 2, guest_check_write_in_dirty_log), TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, - uffd_data_write_handler, 2, + uffd_data_handler, 2, guest_check_write_in_dirty_log), /* @@ -1051,22 +1043,15 @@ static struct test_desc tests[] = { * no userfaultfd write fault. Reads result in userfaultfd getting * triggered. */ - TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, - uffd_data_read_handler, 2), - TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, - uffd_data_read_handler, 2), - TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, - uffd_no_handler, 1), - TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, - uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1), + TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2), TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, - uffd_data_write_handler, 2), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, - uffd_data_read_handler, 2), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, - uffd_no_handler, 1), - TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, - uffd_no_handler, 1), + uffd_data_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1), { 0 } }; -- cgit v1.2.3 From 42561751ea918d8f3f54412622735e1f887cb360 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Fri, 27 Jan 2023 21:43:51 +0000 Subject: KVM: selftests: aarch64: Do not default to dirty PTE pages on all S1PTWs Only Stage1 Page table walks (S1PTW) trying to write into a PTE should result in the PTE page being dirty in the log. However, the dirty log tests in page_fault_test default to treat all S1PTW accesses as writes. Fix the relevant tests by asserting dirty pages only for S1PTW writes, which in these tests only applies to when Hardware management of the Access Flag is enabled. Signed-off-by: Ricardo Koller Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230127214353.245671-3-ricarkol@google.com --- .../selftests/kvm/aarch64/page_fault_test.c | 93 ++++++++++++++-------- 1 file changed, 60 insertions(+), 33 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 0dda58766185..1a3bb2bd8657 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -237,6 +237,11 @@ static void guest_check_s1ptw_wr_in_dirty_log(void) GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); } +static void guest_check_no_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG); +} + static void guest_exec(void) { int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; @@ -791,7 +796,7 @@ static void help(char *name) .expected_events = { .uffd_faults = _uffd_faults, }, \ } -#define TEST_DIRTY_LOG(_access, _with_af, _test_check) \ +#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \ { \ .name = SCAT3(dirty_log, _access, _with_af), \ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ @@ -799,13 +804,12 @@ static void help(char *name) .guest_prepare = { _PREPARE(_with_af), \ _PREPARE(_access) }, \ .guest_test = _access, \ - .guest_test_check = { _CHECK(_with_af), _test_check, \ - guest_check_s1ptw_wr_in_dirty_log}, \ + .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ .expected_events = { 0 }, \ } #define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ - _uffd_faults, _test_check) \ + _uffd_faults, _test_check, _pt_check) \ { \ .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ @@ -814,7 +818,7 @@ static void help(char *name) _PREPARE(_access) }, \ .guest_test = _access, \ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ - .guest_test_check = { _CHECK(_with_af), _test_check }, \ + .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \ .uffd_data_handler = _uffd_data_handler, \ .uffd_pt_handler = uffd_pt_handler, \ .expected_events = { .uffd_faults = _uffd_faults, }, \ @@ -953,16 +957,25 @@ static struct test_desc tests[] = { * Try accesses when the data and PT memory regions are both * tracked for dirty logging. */ - TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log), - /* no_af should also lead to a PT write. */ - TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log), - TEST_DIRTY_LOG(guest_ld_preidx, with_af, guest_check_no_write_in_dirty_log), - TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log), - TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log), - TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log), - TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log), - TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log), - TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_ld_preidx, with_af, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), /* * Access when the data and PT memory regions are both marked for @@ -972,27 +985,41 @@ static struct test_desc tests[] = { * fault, and nothing in the dirty log. Any S1PTW should result in * a write in the dirty log and a userfaultfd write. */ - TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_handler, 2, - guest_check_no_write_in_dirty_log), - /* no_af should also lead to a PT write. */ - TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_handler, 2, - guest_check_no_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_handler, - 2, guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_no_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, + uffd_data_handler, + 2, guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1, - guest_check_no_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_handler, 2, - guest_check_no_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_handler, - 2, guest_check_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_handler, 2, - guest_check_write_in_dirty_log), - TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_handler, - 2, guest_check_write_in_dirty_log), + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, + uffd_data_handler, 2, + guest_check_no_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, + uffd_data_handler, + 2, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, + uffd_data_handler, 2, + guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, + uffd_data_handler, + 2, guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, uffd_data_handler, 2, - guest_check_write_in_dirty_log), - + guest_check_write_in_dirty_log, + guest_check_s1ptw_wr_in_dirty_log), /* * Try accesses when the data memory region is marked read-only * (with KVM_MEM_READONLY). Writes with a syndrome result in an -- cgit v1.2.3 From 8b03c97fa6fd442b949b71aeb7545b970b968fe3 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Fri, 27 Jan 2023 21:43:52 +0000 Subject: KVM: selftests: aarch64: Fix check of dirty log PT write The dirty log checks are mistakenly testing the first page in the page table (PT) memory region instead of the page holding the test data page PTE. This wasn't an issue before commit 406504c7b040 ("KVM: arm64: Fix S1PTW handling on RO memslots") as all PT pages (including the first page) were treated as writes. Fix the page_fault_test dirty logging tests by checking for the right page: the one for the PTE of the data test page. Fixes: a4edf25b3e25 ("KVM: selftests: aarch64: Add dirty logging tests into page_fault_test") Signed-off-by: Ricardo Koller Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230127214353.245671-4-ricarkol@google.com --- tools/testing/selftests/kvm/aarch64/page_fault_test.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 1a3bb2bd8657..2e2178a7d0d8 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -470,9 +470,12 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) { struct userspace_mem_region *data_region, *pt_region; bool continue_test = true; + uint64_t pte_gpa, pte_pg; data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); pt_region = vm_get_mem_region(vm, MEM_REGION_PT); + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize(); if (cmd == CMD_SKIP_TEST) continue_test = false; @@ -485,13 +488,13 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), "Missing write in dirty log"); if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) - TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, 0), + TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg), "Missing s1ptw write in dirty log"); if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), "Unexpected write in dirty log"); if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) - TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, 0), + TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg), "Unexpected s1ptw write in dirty log"); return continue_test; -- cgit v1.2.3 From 08ddbbdf0b55839ca93a12677a30a1ef24634969 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Fri, 27 Jan 2023 21:43:53 +0000 Subject: KVM: selftests: aarch64: Test read-only PT memory regions Extend the read-only memslot tests in page_fault_test to test read-only PT (Page table) memslots. Note that this was not allowed before commit 406504c7b040 ("KVM: arm64: Fix S1PTW handling on RO memslots") as all S1PTW faults were treated as writes which resulted in an (unrecoverable) exception inside the guest. Signed-off-by: Ricardo Koller Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230127214353.245671-5-ricarkol@google.com --- tools/testing/selftests/kvm/aarch64/page_fault_test.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 2e2178a7d0d8..54680dc5887f 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -829,8 +829,9 @@ static void help(char *name) #define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ { \ - .name = SCAT3(ro_memslot, _access, _with_af), \ + .name = SCAT2(ro_memslot, _access), \ .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .mmio_handler = _mmio_handler, \ @@ -841,6 +842,7 @@ static void help(char *name) { \ .name = SCAT2(ro_memslot_no_syndrome, _access), \ .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ .guest_test = _access, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ .expected_events = { .fail_vcpu_runs = 1 }, \ @@ -849,9 +851,9 @@ static void help(char *name) #define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ _test_check) \ { \ - .name = SCAT3(ro_memslot, _access, _with_af), \ + .name = SCAT2(ro_memslot, _access), \ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ - .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .guest_test_check = { _test_check }, \ @@ -863,7 +865,7 @@ static void help(char *name) { \ .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ - .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ .guest_test = _access, \ .guest_test_check = { _test_check }, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ @@ -875,6 +877,7 @@ static void help(char *name) { \ .name = SCAT2(ro_memslot_uffd, _access), \ .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ @@ -890,6 +893,7 @@ static void help(char *name) { \ .name = SCAT2(ro_memslot_no_syndrome, _access), \ .data_memslot_flags = KVM_MEM_READONLY, \ + .pt_memslot_flags = KVM_MEM_READONLY, \ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ .guest_test = _access, \ .uffd_data_handler = _uffd_data_handler, \ @@ -1024,7 +1028,7 @@ static struct test_desc tests[] = { guest_check_write_in_dirty_log, guest_check_s1ptw_wr_in_dirty_log), /* - * Try accesses when the data memory region is marked read-only + * Access when both the PT and data regions are marked read-only * (with KVM_MEM_READONLY). Writes with a syndrome result in an * MMIO exit, writes with no syndrome (e.g., CAS) result in a * failed vcpu run, and reads/execs with and without syndroms do @@ -1040,7 +1044,7 @@ static struct test_desc tests[] = { TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), /* - * Access when both the data region is both read-only and marked + * The PT and data regions are both read-only and marked * for dirty logging at the same time. The expected result is that * for writes there should be no write in the dirty log. The * readonly handling is the same as if the memslot was not marked @@ -1065,7 +1069,7 @@ static struct test_desc tests[] = { guest_check_no_write_in_dirty_log), /* - * Access when the data region is both read-only and punched with + * The PT and data regions are both read-only and punched with * holes tracked with userfaultfd. The expected result is the * union of both userfaultfd and read-only behaviors. For example, * write accesses result in a userfaultfd write fault and an MMIO -- cgit v1.2.3 From e5ae8803847b80fe9d744a3174abe2b7bfed222a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 31 Jan 2023 10:48:03 -0500 Subject: cgroup/cpuset: Fix wrong check in update_parent_subparts_cpumask() It was found that the check to see if a partition could use up all the cpus from the parent cpuset in update_parent_subparts_cpumask() was incorrect. As a result, it is possible to leave parent with no effective cpu left even if there are tasks in the parent cpuset. This can lead to system panic as reported in [1]. Fix this probem by updating the check to fail the enabling the partition if parent's effective_cpus is a subset of the child's cpus_allowed. Also record the error code when an error happens in update_prstate() and add a test case where parent partition and child have the same cpu list and parent has task. Enabling partition in the child will fail in this case. [1] https://www.spinics.net/lists/cgroups/msg36254.html Fixes: f0af1bfc27b5 ("cgroup/cpuset: Relax constraints to partition & cpus changes") Cc: stable@vger.kernel.org # v6.1 Reported-by: Srinivas Pandruvada Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 3 ++- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a29c0b13706b..205dc9edcaa9 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1346,7 +1346,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, * A parent can be left with no CPU as long as there is no * task directly associated with the parent partition. */ - if (!cpumask_intersects(cs->cpus_allowed, parent->effective_cpus) && + if (cpumask_subset(parent->effective_cpus, cs->cpus_allowed) && partition_is_populated(parent, cs)) return PERR_NOCPUS; @@ -2324,6 +2324,7 @@ out: new_prs = -new_prs; spin_lock_irq(&callback_lock); cs->partition_root_state = new_prs; + WRITE_ONCE(cs->prs_err, err); spin_unlock_irq(&callback_lock); /* * Update child cpusets, if present. diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 186e1c26867e..75c100de90ff 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -268,6 +268,7 @@ TEST_MATRIX=( # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. " S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" + " S+ C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" " S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" " S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" -- cgit v1.2.3 From c03c80e3a03ffb4f790901d60797e9810539d946 Mon Sep 17 00:00:00 2001 From: Andrei Gherzan Date: Wed, 1 Feb 2023 00:16:10 +0000 Subject: selftests: net: udpgso_bench_rx: Fix 'used uninitialized' compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change fixes the following compiler warning: /usr/include/x86_64-linux-gnu/bits/error.h:40:5: warning: ‘gso_size’ may be used uninitialized [-Wmaybe-uninitialized] 40 | __error_noreturn (__status, __errnum, __format, __va_arg_pack ()); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ udpgso_bench_rx.c: In function ‘main’: udpgso_bench_rx.c:253:23: note: ‘gso_size’ was declared here 253 | int ret, len, gso_size, budget = 256; Fixes: 3327a9c46352 ("selftests: add functionals test for UDP GRO") Signed-off-by: Andrei Gherzan Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230201001612.515730-1-andrei.gherzan@canonical.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/udpgso_bench_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 6a193425c367..d0895bd1933f 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -250,7 +250,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size) static void do_flush_udp(int fd) { static char rbuf[ETH_MAX_MTU]; - int ret, len, gso_size, budget = 256; + int ret, len, gso_size = 0, budget = 256; len = cfg_read_all ? sizeof(rbuf) : 0; while (budget--) { -- cgit v1.2.3 From db9b47ee9f5f375ab0c5daeb20321c75b4fa657d Mon Sep 17 00:00:00 2001 From: Andrei Gherzan Date: Wed, 1 Feb 2023 00:16:12 +0000 Subject: selftests: net: udpgso_bench_rx/tx: Stop when wrong CLI args are provided Leaving unrecognized arguments buried in the output, can easily hide a CLI/script typo. Avoid this by exiting when wrong arguments are provided to the udpgso_bench test programs. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Signed-off-by: Andrei Gherzan Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230201001612.515730-2-andrei.gherzan@canonical.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/udpgso_bench_rx.c | 2 ++ tools/testing/selftests/net/udpgso_bench_tx.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index d0895bd1933f..4058c7451e70 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -336,6 +336,8 @@ static void parse_opts(int argc, char **argv) cfg_verify = true; cfg_read_all = true; break; + default: + exit(1); } } diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index f1fdaa270291..b47b5c32039f 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -490,6 +490,8 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + default: + exit(1); } } -- cgit v1.2.3 From dafe93b9ee21028d625dce347118b82659652eff Mon Sep 17 00:00:00 2001 From: Andrei Gherzan Date: Wed, 1 Feb 2023 00:16:14 +0000 Subject: selftests: net: udpgso_bench: Fix racing bug between the rx/tx programs "udpgro_bench.sh" invokes udpgso_bench_rx/udpgso_bench_tx programs subsequently and while doing so, there is a chance that the rx one is not ready to accept socket connections. This racing bug could fail the test with at least one of the following: ./udpgso_bench_tx: connect: Connection refused ./udpgso_bench_tx: sendmsg: Connection refused ./udpgso_bench_tx: write: Connection refused This change addresses this by making udpgro_bench.sh wait for the rx program to be ready before firing off the tx one - up to a 10s timeout. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Signed-off-by: Andrei Gherzan Cc: Paolo Abeni Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230201001612.515730-3-andrei.gherzan@canonical.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/udpgso_bench.sh | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index dc932fd65363..640bc43452fa 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m' readonly YELLOW='\033[0;33m' readonly RED='\033[0;31m' readonly NC='\033[0m' # No Color +readonly TESTPORT=8000 readonly KSFT_PASS=0 readonly KSFT_FAIL=1 @@ -56,11 +57,26 @@ trap wake_children EXIT run_one() { local -r args=$@ + local nr_socks=0 + local i=0 + local -r timeout=10 + + ./udpgso_bench_rx -p "$TESTPORT" & + ./udpgso_bench_rx -p "$TESTPORT" -t & + + # Wait for the above test program to get ready to receive connections. + while [ "$i" -lt "$timeout" ]; do + nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")" + [ "$nr_socks" -eq 2 ] && break + i=$((i + 1)) + sleep 1 + done + if [ "$nr_socks" -ne 2 ]; then + echo "timed out while waiting for udpgso_bench_rx" + exit 1 + fi - ./udpgso_bench_rx & - ./udpgso_bench_rx -t & - - ./udpgso_bench_tx ${args} + ./udpgso_bench_tx -p "$TESTPORT" ${args} } run_in_netns() { -- cgit v1.2.3 From 329c9cd769c2e306957df031efff656c40922c76 Mon Sep 17 00:00:00 2001 From: Andrei Gherzan Date: Wed, 1 Feb 2023 00:16:16 +0000 Subject: selftests: net: udpgso_bench_tx: Cater for pending datagrams zerocopy benchmarking The test tool can check that the zerocopy number of completions value is valid taking into consideration the number of datagram send calls. This can catch the system into a state where the datagrams are still in the system (for example in a qdisk, waiting for the network interface to return a completion notification, etc). This change adds a retry logic of computing the number of completions up to a configurable (via CLI) timeout (default: 2 seconds). Fixes: 79ebc3c26010 ("net/udpgso_bench_tx: options to exercise TX CMSG") Signed-off-by: Andrei Gherzan Cc: Willem de Bruijn Cc: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230201001612.515730-4-andrei.gherzan@canonical.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/udpgso_bench_tx.c | 34 +++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index b47b5c32039f..477392715a9a 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -62,6 +62,7 @@ static int cfg_payload_len = (1472 * 42); static int cfg_port = 8000; static int cfg_runtime_ms = -1; static bool cfg_poll; +static int cfg_poll_loop_timeout_ms = 2000; static bool cfg_segment; static bool cfg_sendmmsg; static bool cfg_tcp; @@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd) } } -static void flush_errqueue(int fd, const bool do_poll) +static void flush_errqueue(int fd, const bool do_poll, + unsigned long poll_timeout, const bool poll_err) { if (do_poll) { struct pollfd fds = {0}; int ret; fds.fd = fd; - ret = poll(&fds, 1, 500); + ret = poll(&fds, 1, poll_timeout); if (ret == 0) { - if (cfg_verbose) + if ((cfg_verbose) && (poll_err)) fprintf(stderr, "poll timeout\n"); } else if (ret < 0) { error(1, errno, "poll"); @@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll) flush_errqueue_recv(fd); } +static void flush_errqueue_retry(int fd, unsigned long num_sends) +{ + unsigned long tnow, tstop; + bool first_try = true; + + tnow = gettimeofday_ms(); + tstop = tnow + cfg_poll_loop_timeout_ms; + do { + flush_errqueue(fd, true, tstop - tnow, first_try); + first_try = false; + tnow = gettimeofday_ms(); + } while ((stat_zcopies != num_sends) && (tnow < tstop)); +} + static int send_tcp(int fd, char *data) { int ret, done = 0, count = 0; @@ -413,7 +429,8 @@ static int send_udp_segment(int fd, char *data) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", + error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " + "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } @@ -423,7 +440,7 @@ static void parse_opts(int argc, char **argv) int max_len, hdrlen; int c; - while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) { + while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -452,6 +469,9 @@ static void parse_opts(int argc, char **argv) case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; break; + case 'L': + cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000; + break; case 'm': cfg_sendmmsg = true; break; @@ -679,7 +699,7 @@ int main(int argc, char **argv) num_sends += send_udp(fd, buf[i]); num_msgs++; if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp) - flush_errqueue(fd, cfg_poll); + flush_errqueue(fd, cfg_poll, 500, true); if (cfg_msg_nr && num_msgs >= cfg_msg_nr) break; @@ -698,7 +718,7 @@ int main(int argc, char **argv) } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); if (cfg_zerocopy || cfg_tx_tstamp) - flush_errqueue(fd, true); + flush_errqueue_retry(fd, num_sends); if (close(fd)) error(1, errno, "close"); -- cgit v1.2.3 From 647037adcad00f2bab8828d3d41cd0553d41f3bd Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Tue, 7 Feb 2023 08:21:51 +0000 Subject: Revert "mm: Always release pages to the buddy allocator in memblock_free_late()." This reverts commit 115d9d77bb0f9152c60b6e8646369fa7f6167593. The pages being freed by memblock_free_late() have already been initialized, but if they are in the deferred init range, __free_one_page() might access nearby uninitialized pages when trying to coalesce buddies. This can, for example, trigger this BUG: BUG: unable to handle page fault for address: ffffe964c02580c8 RIP: 0010:__list_del_entry_valid+0x3f/0x70 __free_one_page+0x139/0x410 __free_pages_ok+0x21d/0x450 memblock_free_late+0x8c/0xb9 efi_free_boot_services+0x16b/0x25c efi_enter_virtual_mode+0x403/0x446 start_kernel+0x678/0x714 secondary_startup_64_no_verify+0xd2/0xdb A proper fix will be more involved so revert this change for the time being. Fixes: 115d9d77bb0f ("mm: Always release pages to the buddy allocator in memblock_free_late().") Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/20230207082151.1303-1-dev@aaront.org Signed-off-by: Mike Rapoport (IBM) --- mm/memblock.c | 8 +------- tools/testing/memblock/internal.h | 4 ---- 2 files changed, 1 insertion(+), 11 deletions(-) (limited to 'tools/testing') diff --git a/mm/memblock.c b/mm/memblock.c index 685e30e6d27c..d036c7861310 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1640,13 +1640,7 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size) end = PFN_DOWN(base + size); for (; cursor < end; cursor++) { - /* - * Reserved pages are always initialized by the end of - * memblock_free_all() (by memmap_init() and, if deferred - * initialization is enabled, memmap_init_reserved_pages()), so - * these pages can be released directly to the buddy allocator. - */ - __free_pages_core(pfn_to_page(cursor), 0); + memblock_free_pages(pfn_to_page(cursor), cursor, 0); totalram_pages_inc(); } } diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index 85973e55489e..fdb7f5db7308 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,10 +15,6 @@ bool mirrored_kernelcore = false; struct page {}; -void __free_pages_core(struct page *page, unsigned int order) -{ -} - void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { -- cgit v1.2.3 From bbb253b206b9c417928a6c827d038e457f3012e9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 5 Feb 2023 21:24:09 +0200 Subject: selftests: ocelot: tc_flower_chains: make test_vlan_ingress_modify() more comprehensive We have two IS1 filters of the OCELOT_VCAP_KEY_ANY key type (the one with "action vlan pop" and the one with "action vlan modify") and one of the OCELOT_VCAP_KEY_IPV4 key type (the one with "action skbedit priority"). But we have no IS1 filter with the OCELOT_VCAP_KEY_ETYPE key type, and there was an uncaught breakage there. To increase test coverage, convert one of the OCELOT_VCAP_KEY_ANY filters to OCELOT_VCAP_KEY_ETYPE, by making the filter also match on the MAC SA of the traffic sent by mausezahn, $h1_mac. Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230205192409.1796428-2-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index 9c79bbcce5a8..aff0a59f92d9 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -246,7 +246,7 @@ test_vlan_ingress_modify() bridge vlan add dev $swp2 vid 300 tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \ - protocol 802.1Q flower skip_sw vlan_id 200 \ + protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \ action vlan modify id 300 \ action goto chain $(IS2 0 0) -- cgit v1.2.3 From a635a8c3df66ab68dc088c08a4e9e955e22c0e64 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Feb 2023 14:04:17 +0100 Subject: selftests: mptcp: allow more slack for slow test-case A test-case is frequently failing on some extremely slow VMs. The mptcp transfer completes before the script is able to do all the required PM manipulation. Address the issue in the simplest possible way, making the transfer even more slow. Additionally dump more info in case of failures, to help debugging similar problems in the future and init dump_stats var. Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/323 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index d11d3d566608..f8a969300ef4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1694,6 +1694,7 @@ chk_subflow_nr() local subflow_nr=$3 local cnt1 local cnt2 + local dump_stats if [ -n "${need_title}" ]; then printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}" @@ -1711,7 +1712,12 @@ chk_subflow_nr() echo "[ ok ]" fi - [ "${dump_stats}" = 1 ] && ( ss -N $ns1 -tOni ; ss -N $ns1 -tOni | grep token; ip -n $ns1 mptcp endpoint ) + if [ "${dump_stats}" = 1 ]; then + ss -N $ns1 -tOni + ss -N $ns1 -tOni | grep token + ip -n $ns1 mptcp endpoint + dump_stats + fi } chk_link_usage() @@ -3069,7 +3075,7 @@ endpoint_tests() pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 4 0 0 slow & + run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 & wait_mpj $ns2 pm_nl_del_endpoint $ns2 2 10.0.2.2 -- cgit v1.2.3 From 070d6dafacbaa9d1f2e4e3edc263853d194af15e Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 7 Feb 2023 14:04:18 +0100 Subject: selftests: mptcp: stop tests earlier These 'endpoint' tests from 'mptcp_join.sh' selftest start a transfer in the background and check the status during this transfer. Once the expected events have been recorded, there is no reason to wait for the data transfer to finish. It can be stopped earlier to reduce the execution time by more than half. For these tests, the exchanged data were not verified. Errors, if any, were ignored but that's fine, plenty of other tests are looking at that. It is then OK to mute stderr now that we are sure errors will be printed (and still ignored) because the transfer is stopped before the end. Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f8a969300ef4..079f8f46849d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -498,6 +498,12 @@ kill_events_pids() kill_wait $evts_ns2_pid } +kill_tests_wait() +{ + kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) + wait +} + pm_nl_set_limits() { local ns=$1 @@ -3055,7 +3061,7 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null & wait_mpj $ns1 pm_nl_check_endpoint 1 "creation" \ @@ -3068,14 +3074,14 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint 0 "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - wait + kill_tests_wait fi if reset "delete and re-add"; then pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 & + run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null & wait_mpj $ns2 pm_nl_del_endpoint $ns2 2 10.0.2.2 @@ -3085,7 +3091,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "" "after re-add" 2 - wait + kill_tests_wait fi } -- cgit v1.2.3 From b963d9d5b9437a6b99504987310f98537c9e77d4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 7 Feb 2023 16:18:19 +0200 Subject: selftests: Fix failing VXLAN VNI filtering test iproute2 does not recognize the "group6" and "remote6" keywords. Fix by using "group" and "remote" instead. Before: # ./test_vxlan_vnifiltering.sh [...] Tests passed: 25 Tests failed: 2 After: # ./test_vxlan_vnifiltering.sh [...] Tests passed: 27 Tests failed: 0 Fixes: 3edf5f66c12a ("selftests: add new tests for vxlan vnifiltering") Signed-off-by: Ido Schimmel Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230207141819.256689-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/test_vxlan_vnifiltering.sh | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 704997ffc244..8c3ac0a72545 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -293,19 +293,11 @@ setup-vm() { elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then # Add per vni group config with 'bridge vni' api if [ -n "$group" ]; then - if [ "$family" == "v4" ]; then - if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group - else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group - fi - else - if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group6 $group - else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote6 $group - fi - fi + if [ $mcast -eq 1 ]; then + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group + else + bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group + fi fi fi done -- cgit v1.2.3 From 3a082086aa200852545cf15159213582c0c80eba Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 8 Feb 2023 11:21:10 +0800 Subject: selftests: forwarding: lib: quote the sysctl values When set/restore sysctl value, we should quote the value as some keys may have multi values, e.g. net.ipv4.ping_group_range Fixes: f5ae57784ba8 ("selftests: forwarding: lib: Add sysctl_set(), sysctl_restore()") Signed-off-by: Hangbin Liu Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20230208032110.879205-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/forwarding/lib.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 1c4f866de7d7..3d8e4ebda1b6 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -914,14 +914,14 @@ sysctl_set() local value=$1; shift SYSCTL_ORIG[$key]=$(sysctl -n $key) - sysctl -qw $key=$value + sysctl -qw $key="$value" } sysctl_restore() { local key=$1; shift - sysctl -qw $key=${SYSCTL_ORIG["$key"]} + sysctl -qw $key="${SYSCTL_ORIG[$key]}" } forwarding_enable() -- cgit v1.2.3 From c21a20d9d102ab809a992a6b056abbec4cd4c1cd Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Feb 2023 18:14:07 +0100 Subject: selftests: fib_rule_tests: Test UDP and TCP connections with DSCP rules. Add the fib_rule6_send and fib_rule4_send tests to verify that DSCP values are properly taken into account when UDP or TCP sockets try to connect(). Tests are done with nettest, which needs a new option to specify the DS Field value of the socket being tested. This new option is named '-Q', in reference to the similar option used by ping. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 128 +++++++++++++++++++++++++- tools/testing/selftests/net/nettest.c | 51 +++++++++- 2 files changed, 177 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index c245476fa29d..63c3eaec8d30 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -10,8 +10,10 @@ ret=0 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} IP="ip -netns testns" +IP_PEER="ip -netns peerns" RTABLE=100 +RTABLE_PEER=101 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -20,7 +22,9 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 -TESTS="fib_rule6 fib_rule4" +TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" + +SELFTEST_PATH="" log_test() { @@ -52,6 +56,31 @@ log_section() echo "######################################################################" } +check_nettest() +{ + if which nettest > /dev/null 2>&1; then + return 0 + fi + + # Add the selftest directory to PATH if not already done + if [ "${SELFTEST_PATH}" = "" ]; then + SELFTEST_PATH="$(dirname $0)" + PATH="${PATH}:${SELFTEST_PATH}" + + # Now retry with the new path + if which nettest > /dev/null 2>&1; then + return 0 + fi + + if [ "${ret}" -eq 0 ]; then + ret="${ksft_skip}" + fi + echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" + fi + + return 1 +} + setup() { set -e @@ -72,6 +101,39 @@ cleanup() ip netns del testns } +setup_peer() +{ + set -e + + ip netns add peerns + $IP_PEER link set dev lo up + + ip link add name veth0 netns testns type veth \ + peer name veth1 netns peerns + $IP link set dev veth0 up + $IP_PEER link set dev veth1 up + + $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0 + $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1 + + $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad + $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad + + $IP_PEER address add 198.51.100.11/32 dev lo + $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11 + + $IP_PEER address add 2001:db8::1:11/128 dev lo + $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11 + + set +e +} + +cleanup_peer() +{ + $IP link del dev veth0 + ip netns del peerns +} + fib_check_iproute_support() { ip rule help 2>&1 | grep -q $1 @@ -190,6 +252,37 @@ fib_rule6_test() fi } +# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly +# taken into account when connecting the socket and when sending packets. +fib_rule6_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -6 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ + -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + fib_rule4_del() { $IP rule del $1 @@ -296,6 +389,37 @@ fib_rule4_test() fi } +# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken +# into account when connecting the socket and when sending packets. +fib_rule4_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -4 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + run_fibrule_tests() { log_section "IPv4 fib rule" @@ -345,6 +469,8 @@ do case $t in fib_rule6_test|fib_rule6) fib_rule6_test;; fib_rule4_test|fib_rule4) fib_rule4_test;; + fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; + fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 7900fa98eccb..ee9a72982705 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -87,6 +87,7 @@ struct sock_args { int use_setsockopt; int use_freebind; int use_cmsg; + uint8_t dsfield; const char *dev; const char *server_dev; int ifindex; @@ -580,6 +581,36 @@ static int set_reuseaddr(int sd) return rc; } +static int set_dsfield(int sd, int version, int dsfield) +{ + if (!dsfield) + return 0; + + switch (version) { + case AF_INET: + if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IP_TOS)"); + return -1; + } + break; + + case AF_INET6: + if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IPV6_TCLASS)"); + return -1; + } + break; + + default: + log_error("Invalid address family\n"); + return -1; + } + + return 0; +} + static int str_to_uint(const char *str, int min, int max, unsigned int *value) { int number; @@ -1317,6 +1348,9 @@ static int msock_init(struct sock_args *args, int server) (char *)&one, sizeof(one)) < 0) log_err_errno("Setting SO_BROADCAST error"); + if (set_dsfield(sd, AF_INET, args->dsfield) != 0) + goto out_err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto out_err; else if (args->use_setsockopt && @@ -1445,6 +1479,9 @@ static int lsock_init(struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1658,6 +1695,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1862,7 +1902,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" +#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" #define OPT_FORCE_BIND_KEY_IFINDEX 1001 #define OPT_NO_BIND_KEY_IFINDEX 1002 @@ -1893,6 +1933,8 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -Q dsfield DS Field value of the socket (the IP_TOS or\n" + " IPV6_TCLASS socket option)\n" " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" @@ -1971,6 +2013,13 @@ int main(int argc, char *argv[]) args.has_local_ip = 1; args.client_local_addr_str = optarg; break; + case 'Q': + if (str_to_uint(optarg, 0, 255, &tmp) != 0) { + fprintf(stderr, "Invalid DS Field\n"); + return 1; + } + args.dsfield = tmp; + break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { fprintf(stderr, "Invalid port\n"); -- cgit v1.2.3