diff options
Diffstat (limited to 'tools')
45 files changed, 4287 insertions, 159 deletions
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f45bacbaa3d5..e43e1896cb4b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -215,7 +215,7 @@ define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' endef install_lib: all_cmd diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b5dbd5adc0e8..5e2809d685bf 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -464,7 +464,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) return err; case BTF_KIND_ARRAY: - return btf_dump_order_type(d, btf_array(t)->type, through_ptr); + return btf_dump_order_type(d, btf_array(t)->type, false); case BTF_KIND_STRUCT: case BTF_KIND_UNION: { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 058b643cbcb1..c0fd2c718a7d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1185,7 +1185,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); - return -LIBBPF_ERRNO__FORMAT; + err = -LIBBPF_ERRNO__FORMAT; + goto errout; } /* Old LLVM set e_machine to EM_NONE */ diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 4dd73de00b6f..d2cb28e9ef52 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -40,7 +40,7 @@ static int libbpf_netlink_open(__u32 *nl_pid) memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; - sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); if (sock < 0) return -errno; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 068cdb41f76f..5e5388a38e2a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2442,6 +2442,9 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state) if (update_cfi_state(insn, &state->cfi, op)) return 1; + if (!insn->alt_group) + continue; + if (op->dest.type == OP_DEST_PUSHF) { if (!state->uaccess_stack) { state->uaccess_stack = 1; diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config index a7f0603d33f6..690870043ac0 100644 --- a/tools/testing/kunit/configs/broken_on_uml.config +++ b/tools/testing/kunit/configs/broken_on_uml.config @@ -40,3 +40,5 @@ # CONFIG_RESET_BRCMSTB_RESCAL is not set # CONFIG_RESET_INTEL_GW is not set # CONFIG_ADI_AXI_ADC is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_PAGE_POISONING is not set diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 0b550cbd667d..1e2683dcc0e7 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -13,7 +13,7 @@ from typing import List, Set CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' -KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value']) +KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value']) class KconfigEntry(KconfigEntryBase): diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b2282be6f938..612d3899614a 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -332,5 +332,5 @@ int main(void) ksft_print_cnts(); - return 0; + return ret; } diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 9210691aa998..e3e08d9c7020 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -284,16 +284,28 @@ endfunction // Set up test pattern in the FFR // x0: pid // x2: generation +// +// We need to generate a canonical FFR value, which consists of a number of +// low "1" bits, followed by a number of zeros. This gives us 17 unique values +// per 16 bits of FFR, so we create a 4 bit signature out of the PID and +// generation, and use that as the initial number of ones in the pattern. +// We fill the upper lanes of FFR with zeros. // Beware: corrupts P0. function setup_ffr mov x4, x30 - bl pattern + and w0, w0, #0x3 + bfi w0, w2, #2, #2 + mov w1, #1 + lsl w1, w1, w0 + sub w1, w1, #1 + ldr x0, =ffrref - ldr x1, =scratch - rdvl x2, #1 - lsr x2, x2, #3 - bl memcpy + strh w1, [x0], 2 + rdvl x1, #1 + lsr x1, x1, #3 + sub x1, x1, #2 + bl memclr mov x0, #0 ldr x1, =ffrref diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 36af1c138faf..b62a39315336 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -128,6 +128,8 @@ static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex) test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu); test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu); test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len, mtu); + test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len_exceed, mtu); cleanup: test_check_mtu__destroy(skel); @@ -187,6 +189,8 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu); + test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu); cleanup: test_check_mtu__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c new file mode 100644 index 000000000000..6c4d42a2386f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include <time.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include "fexit_sleep.skel.h" + +static int do_sleep(void *skel) +{ + struct fexit_sleep *fexit_skel = skel; + struct timespec ts1 = { .tv_nsec = 1 }; + struct timespec ts2 = { .tv_sec = 10 }; + + fexit_skel->bss->pid = getpid(); + (void)syscall(__NR_nanosleep, &ts1, NULL); + (void)syscall(__NR_nanosleep, &ts2, NULL); + return 0; +} + +#define STACK_SIZE (1024 * 1024) +static char child_stack[STACK_SIZE]; + +void test_fexit_sleep(void) +{ + struct fexit_sleep *fexit_skel = NULL; + int wstatus, duration = 0; + pid_t cpid; + int err, fexit_cnt; + + fexit_skel = fexit_sleep__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto cleanup; + + err = fexit_sleep__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto cleanup; + + cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel); + if (CHECK(cpid == -1, "clone", strerror(errno))) + goto cleanup; + + /* wait until first sys_nanosleep ends and second sys_nanosleep starts */ + while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2); + fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt); + if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt)) + goto cleanup; + + /* close progs and detach them. That will trigger two nop5->jmp5 rewrites + * in the trampolines to skip nanosleep_fexit prog. + * The nanosleep_fentry prog will get detached first. + * The nanosleep_fexit prog will get detached second. + * Detaching will trigger freeing of both progs JITed images. + * There will be two dying bpf_tramp_image-s, but only the initial + * bpf_tramp_image (with both _fentry and _fexit progs will be stuck + * waiting for percpu_ref_kill to confirm). The other one + * will be freed quickly. + */ + close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry)); + close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit)); + fexit_sleep__detach(fexit_skel); + + /* kill the thread to unwind sys_nanosleep stack through the trampoline */ + kill(cpid, 9); + + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno))) + goto cleanup; + if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed")) + goto cleanup; + + /* The bypassed nanosleep_fexit prog shouldn't have executed. + * Unlike progs the maps were not freed and directly accessible. + */ + fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt); + if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt)) + goto cleanup; + +cleanup: + fexit_sleep__destroy(fexit_skel); +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 12b40dc81e14..8aaa24a00322 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -174,6 +174,12 @@ struct struct_in_struct { }; }; +struct struct_in_array {}; + +struct struct_in_array_typed {}; + +typedef struct struct_in_array_typed struct_in_array_t[2]; + struct struct_with_embedded_stuff { int a; struct { @@ -203,6 +209,8 @@ struct struct_with_embedded_stuff { } r[5]; struct struct_in_struct s[10]; int t[11]; + struct struct_in_array (*u)[2]; + struct_in_array_t *v; }; struct float_struct { diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c new file mode 100644 index 000000000000..03a672d76353 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char LICENSE[] SEC("license") = "GPL"; + +int pid = 0; +int fentry_cnt = 0; +int fexit_cnt = 0; + +SEC("fentry/__x64_sys_nanosleep") +int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) +{ + if ((int)bpf_get_current_pid_tgid() != pid) + return 0; + + fentry_cnt++; + return 0; +} + +SEC("fexit/__x64_sys_nanosleep") +int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) +{ + if ((int)bpf_get_current_pid_tgid() != pid) + return 0; + + fexit_cnt++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c index b7787b43f9db..c4a9bae96e75 100644 --- a/tools/testing/selftests/bpf/progs/test_check_mtu.c +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -105,6 +105,54 @@ int xdp_minus_delta(struct xdp_md *ctx) return retval; } +SEC("xdp") +int xdp_input_len(struct xdp_md *ctx) +{ + int retval = XDP_PASS; /* Expected retval on successful test */ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 ifindex = GLOBAL_USER_IFINDEX; + __u32 data_len = data_end - data; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input len is L3, like MTU and iph->tot_len. + * Remember XDP data_len is L2. + */ + __u32 mtu_len = data_len - ETH_HLEN; + + if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0)) + retval = XDP_ABORTED; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("xdp") +int xdp_input_len_exceed(struct xdp_md *ctx) +{ + int retval = XDP_ABORTED; /* Fail */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + int err; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input length value is L3 size like MTU. + */ + __u32 mtu_len = GLOBAL_USER_MTU; + + mtu_len += 1; /* Exceed with 1 */ + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0); + if (err == BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = XDP_PASS ; /* Success in exceeding MTU check */ + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + SEC("classifier") int tc_use_helper(struct __sk_buff *ctx) { @@ -196,3 +244,47 @@ int tc_minus_delta(struct __sk_buff *ctx) global_bpf_mtu_xdp = mtu_len; return retval; } + +SEC("classifier") +int tc_input_len(struct __sk_buff *ctx) +{ + int retval = BPF_OK; /* Expected retval on successful test */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input length value is L3 size. + */ + __u32 mtu_len = GLOBAL_USER_MTU; + + if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0)) + retval = BPF_DROP; + + global_bpf_mtu_xdp = mtu_len; + return retval; +} + +SEC("classifier") +int tc_input_len_exceed(struct __sk_buff *ctx) +{ + int retval = BPF_DROP; /* Fail */ + __u32 ifindex = GLOBAL_USER_IFINDEX; + int err; + + /* API allow user give length to check as input via mtu_len param, + * resulting MTU value is still output in mtu_len param after call. + * + * Input length value is L3 size like MTU. + */ + __u32 mtu_len = GLOBAL_USER_MTU; + + mtu_len += 1; /* Exceed with 1 */ + + err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0); + if (err == BPF_MTU_CHK_RET_FRAG_NEEDED) + retval = BPF_OK; /* Success in exceeding MTU check */ + + global_bpf_mtu_xdp = mtu_len; + return retval; +} diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 9afe947cfae9..ba6eadfec565 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -508,10 +508,8 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb) } ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } + if (ret < 0) + gopt.opt_class = 0; bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4, gopt.opt_class); diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c index 1fd07a4f27ac..c162498a64fc 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c +++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c @@ -6,8 +6,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 2", @@ -20,6 +21,8 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 1, }, @@ -31,8 +34,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 4", @@ -45,6 +49,8 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -55,8 +61,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 6", @@ -67,8 +74,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 7", @@ -80,8 +88,9 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", .errstr = "dereference of modified ctx ptr", + .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -94,8 +103,9 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", .errstr = "dereference of modified ctx ptr", + .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -106,8 +116,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 10", @@ -119,6 +130,6 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, .errstr = "math between ctx pointer and register with unbounded min value is not allowed", + .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index b117bdd3806d..6f610cfddae5 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -75,6 +75,8 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_16b = { 4 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", .result = ACCEPT, }, { @@ -91,5 +93,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_16b = { 4 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index b018ad71e0a8..3e32400c4b44 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -497,7 +497,7 @@ .result = ACCEPT, }, { - "unpriv: adding of fp", + "unpriv: adding of fp, reg", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_1, 0), @@ -505,6 +505,19 @@ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: adding of fp, imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", .result_unpriv = REJECT, .result = ACCEPT, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index ed4e76b24649..feb91266db39 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -169,7 +169,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps or paths", + .errstr_unpriv = "R2 tried to add from different maps, paths, or prohibited types", .retval = 0, }, { @@ -517,6 +517,27 @@ .retval = 0xabcdef12, }, { + "map access: value_ptr += N, value_ptr -= N known scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV32_IMM(BPF_REG_1, 0x12345678), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0x12345678, +}, +{ "map access: unknown scalar += value_ptr, 1", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 1fedfc9da434..42d44e27802c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -446,6 +446,35 @@ __invalid_nexthop_test() log_test "Unresolved neigh: nexthop does not exist: $desc" } +__invalid_nexthop_bucket_test() +{ + local desc=$1; shift + local dip=$1; shift + local via_add=$1; shift + local trap_name="unresolved_neigh" + + RET=0 + + # Check that route to nexthop that does not exist triggers + # unresolved_neigh + ip nexthop add id 1 via $via_add dev $rp2 + ip nexthop add id 10 group 1 type resilient buckets 32 + ip route add $dip nhid 10 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + ip route del $dip nhid 10 + ip nexthop del id 10 + ip nexthop del id 1 + log_test "Unresolved neigh: nexthop bucket does not exist: $desc" +} + unresolved_neigh_test() { __host_miss_test "IPv4" 198.51.100.1 @@ -453,6 +482,8 @@ unresolved_neigh_test() __invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4 __invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \ 2001:db8:2::4 + __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4 + __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4 } vrf_without_routes_create() diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index ed346da5d3cb..a217f9f6775b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -33,6 +33,7 @@ ALL_TESTS=" nexthop_obj_invalid_test nexthop_obj_offload_test nexthop_obj_group_offload_test + nexthop_obj_bucket_offload_test nexthop_obj_blackhole_offload_test nexthop_obj_route_offload_test devlink_reload_test @@ -739,11 +740,28 @@ nexthop_obj_invalid_test() ip nexthop add id 1 dev $swp1 ip nexthop add id 2 dev $swp1 + ip nexthop add id 3 via 192.0.2.3 dev $swp1 ip nexthop add id 10 group 1/2 check_fail $? "managed to configure a nexthop group with device-only nexthops when should not" + ip nexthop add id 10 group 3 type resilient buckets 7 + check_fail $? "managed to configure a too small resilient nexthop group when should not" + + ip nexthop add id 10 group 3 type resilient buckets 129 + check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not" + + ip nexthop add id 10 group 1/2 type resilient buckets 32 + check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not" + + ip nexthop add id 10 group 3 type resilient buckets 32 + check_err $? "failed to configure a valid resilient nexthop group" + ip nexthop replace id 3 dev $swp1 + check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not" + log_test "nexthop objects - invalid configurations" + ip nexthop del id 10 + ip nexthop del id 3 ip nexthop del id 2 ip nexthop del id 1 @@ -858,6 +876,70 @@ nexthop_obj_group_offload_test() simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 } +nexthop_obj_bucket_offload_test() +{ + # Test offload indication of nexthop buckets + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 + ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked as offloaded when should" + + # Invalidate nexthop id 1 + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked with trap when should" + + # Invalidate nexthop id 2 + ip neigh replace 2001:db8:1::2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked with trap when should" + + # Revalidate nexthop id 1 by changing its configuration + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop" + + # Revalidate nexthop id 2 by changing its neighbour + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour" + + log_test "nexthop bucket offload indication" + + ip neigh del 2001:db8:1::2 dev $swp1 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 10 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + nexthop_obj_blackhole_offload_test() { # Test offload indication of blackhole nexthop objects diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh deleted file mode 100755 index 0231205a7147..000000000000 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -lib_dir=$(dirname $0)/../../../../net/forwarding - -VXPORT=4789 - -ALL_TESTS=" - create_dot1d_and_dot1ad_vxlans -" -NUM_NETIFS=2 -source $lib_dir/lib.sh - -setup_prepare() -{ - swp1=${NETIFS[p1]} - swp2=${NETIFS[p2]} - - ip link set dev $swp1 up - ip link set dev $swp2 up -} - -cleanup() -{ - pre_cleanup - - ip link set dev $swp2 down - ip link set dev $swp1 down -} - -create_dot1d_and_dot1ad_vxlans() -{ - RET=0 - - ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ - vlan_default_pvid 0 mcast_snooping 0 - ip link set dev br0 up - - ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \ - "$VXPORT" nolearning noudpcsum tos inherit ttl 100 - ip link set dev vx100 up - - ip link set dev $swp1 master br0 - ip link set dev vx100 master br0 - bridge vlan add vid 100 dev vx100 pvid untagged - - ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0 - ip link set dev br1 up - - ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \ - "$VXPORT" nolearning noudpcsum tos inherit ttl 100 - ip link set dev vx200 up - - ip link set dev $swp2 master br1 - ip link set dev vx200 master br1 2>/dev/null - check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected" - - ip link set dev vx200 master br1 2>&1 >/dev/null \ - | grep -q mlxsw_spectrum - check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack" - - log_test "create 802.1d and 802.1ad VxLANs" - - ip link del dev vx200 - ip link del dev br1 - ip link del dev vx100 - ip link del dev br0 -} - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh index 553cb9fad508..b4dbda706c4d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -18,6 +18,7 @@ NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh switch_create() { @@ -166,7 +167,8 @@ matchall_sample_egress_test() RET=0 # It is forbidden in mlxsw driver to have matchall with sample action - # bound on egress + # bound on egress. Spectrum-1 specific restriction + [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return tc qdisc add dev $swp1 clsact diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh new file mode 100755 index 000000000000..57b05f042787 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -0,0 +1,627 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that packets are sampled when tc-sample is used and that reported +# metadata is correct. Two sets of hosts (with and without LAG) are used, since +# metadata extraction in mlxsw is a bit different when LAG is involved. +# +# +---------------------------------+ +---------------------------------+ +# | H1 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h3_lag | +# | | 192.0.2.1/28 | | | 192.0.2.17/28 | +# | | | | | | +# | | default via 192.0.2.2 | | | default via 192.0.2.18 | +# +----|----------------------------+ +----|----------------------------+ +# | | +# +----|-----------------------------------------|----------------------------+ +# | | 192.0.2.2/28 | 192.0.2.18/28 | +# | + $rp1 + $rp3_lag | +# | | +# | + $rp2 + $rp4_lag | +# | | 198.51.100.2/28 | 198.51.100.18/28 | +# +----|-----------------------------------------|----------------------------+ +# | | +# +----|----------------------------+ +----|----------------------------+ +# | | default via 198.51.100.2 | | | default via 198.51.100.18 | +# | | | | | | +# | | 198.51.100.1/28 | | | 198.51.100.17/28 | +# | + $h2 | | + $h4_lag | +# | H2 (vrf) | | H4 (vrf) | +# +---------------------------------+ +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + tc_sample_rate_test + tc_sample_max_rate_test + tc_sample_group_conflict_test + tc_sample_md_iif_test + tc_sample_md_lag_iif_test + tc_sample_md_oif_test + tc_sample_md_lag_oif_test + tc_sample_md_out_tc_test + tc_sample_md_out_tc_occ_test + tc_sample_md_latency_test + tc_sample_acl_group_conflict_test + tc_sample_acl_rate_test + tc_sample_acl_max_rate_test +" +NUM_NETIFS=8 +CAPTURE_FILE=$(mktemp) +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +# Available at https://github.com/Mellanox/libpsample +require_command psample + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/28 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 +} + +h2_destroy() +{ + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/28 +} + +h3_create() +{ + ip link set dev $h3 down + ip link add name ${h3}_bond type bond mode 802.3ad + ip link set dev $h3 master ${h3}_bond + + simple_if_init ${h3}_bond 192.0.2.17/28 + + ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18 +} + +h3_destroy() +{ + ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18 + + simple_if_fini ${h3}_bond 192.0.2.17/28 + + ip link set dev $h3 nomaster + ip link del dev ${h3}_bond +} + +h4_create() +{ + ip link set dev $h4 down + ip link add name ${h4}_bond type bond mode 802.3ad + ip link set dev $h4 master ${h4}_bond + + simple_if_init ${h4}_bond 198.51.100.17/28 + + ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18 +} + +h4_destroy() +{ + ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18 + + simple_if_fini ${h4}_bond 198.51.100.17/28 + + ip link set dev $h4 nomaster + ip link del dev ${h4}_bond +} + +router_create() +{ + ip link set dev $rp1 up + __addr_add_del $rp1 add 192.0.2.2/28 + tc qdisc add dev $rp1 clsact + + ip link set dev $rp2 up + __addr_add_del $rp2 add 198.51.100.2/28 + tc qdisc add dev $rp2 clsact + + ip link add name ${rp3}_bond type bond mode 802.3ad + ip link set dev $rp3 master ${rp3}_bond + __addr_add_del ${rp3}_bond add 192.0.2.18/28 + tc qdisc add dev $rp3 clsact + ip link set dev ${rp3}_bond up + + ip link add name ${rp4}_bond type bond mode 802.3ad + ip link set dev $rp4 master ${rp4}_bond + __addr_add_del ${rp4}_bond add 198.51.100.18/28 + tc qdisc add dev $rp4 clsact + ip link set dev ${rp4}_bond up +} + +router_destroy() +{ + ip link set dev ${rp4}_bond down + tc qdisc del dev $rp4 clsact + __addr_add_del ${rp4}_bond del 198.51.100.18/28 + ip link set dev $rp4 nomaster + ip link del dev ${rp4}_bond + + ip link set dev ${rp3}_bond down + tc qdisc del dev $rp3 clsact + __addr_add_del ${rp3}_bond del 192.0.2.18/28 + ip link set dev $rp3 nomaster + ip link del dev ${rp3}_bond + + tc qdisc del dev $rp2 clsact + __addr_add_del $rp2 del 198.51.100.2/28 + ip link set dev $rp2 down + + tc qdisc del dev $rp1 clsact + __addr_add_del $rp1 del 192.0.2.2/28 + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + h3=${NETIFS[p5]} + rp3=${NETIFS[p6]} + h4=${NETIFS[p7]} + rp4=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + h3_create + h4_create + router_create +} + +cleanup() +{ + pre_cleanup + + rm -f $CAPTURE_FILE + + router_destroy + h4_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +psample_capture_start() +{ + rm -f $CAPTURE_FILE + + psample &> $CAPTURE_FILE & + + sleep 1 +} + +psample_capture_stop() +{ + { kill %% && wait %%; } 2>/dev/null +} + +__tc_sample_rate_test() +{ + local desc=$1; shift + local dip=$1; shift + local pkts pct + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B $dip -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) + pct=$((100 * (pkts - 100) / 100)) + (( -25 <= pct && pct <= 25)) + check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + + log_test "tc sample rate ($desc)" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_rate_test() +{ + __tc_sample_rate_test "forward" 198.51.100.1 + __tc_sample_rate_test "local receive" 192.0.2.2 +} + +tc_sample_max_rate_test() +{ + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate $((35 * 10 ** 8)) group 1 + check_err $? "Failed to configure sampling rule with max rate" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate $((35 * 10 ** 8 + 1)) \ + group 1 &> /dev/null + check_fail $? "Managed to configure sampling rate above maximum" + + log_test "tc sample maximum rate" +} + +tc_sample_group_conflict_test() +{ + RET=0 + + # Test that two sampling rules cannot be configured on the same port + # with different groups. + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 2 &> /dev/null + check_fail $? "Managed to configure sampling rule with conflicting group" + + log_test "tc sample group conflict test" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_iif_test() +{ + local rp1_ifindex + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]') + grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected in-ifindex" + + log_test "tc sample iif" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_lag_iif_test() +{ + local rp3_ifindex + + RET=0 + + tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \ + -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]') + grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected in-ifindex" + + log_test "tc sample lag iif" + + tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_oif_test() +{ + local rp2_ifindex + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]') + grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-ifindex" + + log_test "tc sample oif" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_lag_oif_test() +{ + local rp4_ifindex + + RET=0 + + tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \ + -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]') + grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-ifindex" + + log_test "tc sample lag oif" + + tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_out_tc_test() +{ + RET=0 + + # Output traffic class is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + # By default, all the packets should go to the same traffic class (0). + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "out-tc 0 " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-tc (0)" + + # Map all priorities to highest traffic class (7) and check reported + # out-tc. + tc qdisc replace dev $rp2 root handle 1: \ + prio bands 3 priomap 0 0 0 0 0 0 0 0 + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "out-tc 7 " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-tc (7)" + + log_test "tc sample out-tc" + + tc qdisc del dev $rp2 root handle 1: + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_out_tc_occ_test() +{ + local backlog pct occ + + RET=0 + + # Output traffic class occupancy is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + # Configure a shaper on egress to create congestion. + tc qdisc replace dev $rp2 root handle 1: \ + tbf rate 1Mbit burst 256k limit 1M + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q & + + # Allow congestion to reach steady state. + sleep 10 + + backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]') + + # Kill mausezahn. + { kill %% && wait %%; } 2>/dev/null + + psample_capture_stop + + # Record last congestion sample. + occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \ + cut -d ' ' -f 16) + + pct=$((100 * (occ - backlog) / backlog)) + (( -1 <= pct && pct <= 1)) + check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%" + + log_test "tc sample out-tc-occ" + + tc qdisc del dev $rp2 root handle 1: + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_latency_test() +{ + RET=0 + + # Egress sampling not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "latency " $CAPTURE_FILE + check_err $? "Sampled packets do not have latency attribute" + + log_test "tc sample latency" + + tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall +} + +tc_sample_acl_group_conflict_test() +{ + RET=0 + + # Test that two flower sampling rules cannot be configured on the same + # port with different groups. + + # Policy-based sampling is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule with same group" + + tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \ + skip_sw action sample rate 1024 group 2 &> /dev/null + check_fail $? "Managed to configure sampling rule with conflicting group" + + log_test "tc sample (w/ flower) group conflict test" + + tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower +} + +__tc_sample_acl_rate_test() +{ + local bind=$1; shift + local port=$1; shift + local pkts pct + + RET=0 + + # Policy-based sampling is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) + pct=$((100 * (pkts - 100) / 100)) + (( -25 <= pct && pct <= 25)) + check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + + # Setup a filter that should not match any packet and make sure packets + # are not sampled. + tc filter del dev $port $bind protocol ip pref 1 handle 101 flower + + tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "group 1 " $CAPTURE_FILE + check_fail $? "Sampled packets when should not" + + log_test "tc sample (w/ flower) rate ($bind)" + + tc filter del dev $port $bind protocol ip pref 1 handle 101 flower +} + +tc_sample_acl_rate_test() +{ + __tc_sample_acl_rate_test ingress $rp1 + __tc_sample_acl_rate_test egress $rp2 +} + +tc_sample_acl_max_rate_test() +{ + RET=0 + + # Policy-based sampling is not supported on Spectrum-1. + [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate $((2 ** 24 - 1)) group 1 + check_err $? "Failed to configure sampling rule with max rate" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate $((2 ** 24)) \ + group 1 &> /dev/null + check_fail $? "Managed to configure sampling rate above maximum" + + log_test "tc sample (w/ flower) maximum rate" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index be0c1b5ee6b8..ba75c81cda91 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -11,14 +11,33 @@ ALL_TESTS=" nexthop_single_add_err_test nexthop_group_add_test nexthop_group_add_err_test + nexthop_res_group_add_test + nexthop_res_group_add_err_test nexthop_group_replace_test nexthop_group_replace_err_test + nexthop_res_group_replace_test + nexthop_res_group_replace_err_test + nexthop_res_group_idle_timer_test + nexthop_res_group_idle_timer_del_test + nexthop_res_group_increase_idle_timer_test + nexthop_res_group_decrease_idle_timer_test + nexthop_res_group_unbalanced_timer_test + nexthop_res_group_unbalanced_timer_del_test + nexthop_res_group_no_unbalanced_timer_test + nexthop_res_group_short_unbalanced_timer_test + nexthop_res_group_increase_unbalanced_timer_test + nexthop_res_group_decrease_unbalanced_timer_test + nexthop_res_group_force_migrate_busy_test nexthop_single_replace_test nexthop_single_replace_err_test nexthop_single_in_group_replace_test nexthop_single_in_group_replace_err_test + nexthop_single_in_res_group_replace_test + nexthop_single_in_res_group_replace_err_test nexthop_single_in_group_delete_test nexthop_single_in_group_delete_err_test + nexthop_single_in_res_group_delete_test + nexthop_single_in_res_group_delete_err_test nexthop_replay_test nexthop_replay_err_test " @@ -27,6 +46,7 @@ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh @@ -44,6 +64,28 @@ nexthop_check() return 0 } +nexthop_bucket_nhid_count_check() +{ + local group_id=$1; shift + local expected + local count + local nhid + local ret + + while (($# > 0)); do + nhid=$1; shift + expected=$1; shift + + count=$($IP nexthop bucket show id $group_id nhid $nhid | + grep "trap" | wc -l) + if ((expected != count)); then + return 1 + fi + done + + return 0 +} + nexthop_resource_check() { local expected_occ=$1; shift @@ -159,6 +201,71 @@ nexthop_group_add_err_test() nexthop_resource_set 9999 } +nexthop_res_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5 + nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 3 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 7 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Resilient nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Resilient nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + nexthop_group_replace_test() { RET=0 @@ -206,6 +313,411 @@ nexthop_group_replace_err_test() nexthop_resource_set 9999 } +nexthop_res_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 6 + + $IP nexthop replace id 10 group 1/2/3 type resilient + nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 3 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 9 + check_err $? "Wrong nexthop occupancy" + + log_test "Resilient nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_replace_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 6 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace + $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_bucket_nhid_count_check 10 1 3 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 3 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 9 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Resilient nexthop group replace failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace +} + +nexthop_res_mark_buckets_busy() +{ + local group_id=$1; shift + local nhid=$1; shift + local count=$1; shift + local index + + for index in $($IP -j nexthop bucket show id $group_id nhid $nhid | + jq '.[].bucket.index' | head -n ${count:--0}) + do + echo $group_id $index \ + > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity + done +} + +nexthop_res_num_nhid_buckets() +{ + local group_id=$1; shift + local nhid=$1; shift + + $IP -j nexthop bucket show id $group_id nhid $nhid | jq length +} + +nexthop_res_group_idle_timer_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "Group expected to be unbalanced" + + sleep 6 + + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after idle timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_idle_timer_del_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,50/2,50/3,1 \ + type resilient buckets 8 idle_timer 6 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient + + nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0 + check_err $? "Group expected to be unbalanced" + + sleep 4 + + # Deletion prompts group replacement. Check that the bucket timers + # are kept. + $IP nexthop delete id 3 + + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "Group expected to still be unbalanced" + + sleep 4 + + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after idle timer (with delete)" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_increase_timer_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8 + sleep 4 + + # 6 seconds, past the original timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group still expected to be unbalanced" + + sleep 4 + + # 10 seconds, past the new timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer increase" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_decrease_timer_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4 + sleep 4 + + # 6 seconds, past the new timer, before the old timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer decrease" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_increase_timer_del_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,100/2,100/3,1 \ + type resilient buckets 8 $timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8 + sleep 4 + + # 6 seconds, past the original timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group still expected to be unbalanced" + + sleep 4 + + # 10 seconds, past the new timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer increase" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_increase_idle_timer_test() +{ + __nexthop_res_group_increase_timer_test idle_timer +} + +nexthop_res_group_decrease_idle_timer_test() +{ + __nexthop_res_group_decrease_timer_test idle_timer +} + +nexthop_res_group_unbalanced_timer_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 6 unbalanced_timer 10 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + for i in 1 2; do + sleep 4 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "$i: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + done + + # 3 x sleep 4 > unbalanced timer 10 + sleep 4 + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_unbalanced_timer_del_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \ + buckets 8 idle_timer 6 unbalanced_timer 10 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient + + # Check that NH delete does not reset unbalanced time. + sleep 4 + $IP nexthop delete id 3 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "1: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + + sleep 4 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "2: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + + # 3 x sleep 4 > unbalanced timer 10 + sleep 4 + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced timer (with delete)" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_no_unbalanced_timer_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + for i in $(seq 3); do + sleep 60 + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "$i: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + done + + log_test "Buckets never force-migrated without unbalanced timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_short_unbalanced_timer_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 120 unbalanced_timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 5 + + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced < idle timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_increase_unbalanced_timer_test() +{ + __nexthop_res_group_increase_timer_test unbalanced_timer +} + +nexthop_res_group_decrease_unbalanced_timer_test() +{ + __nexthop_res_group_decrease_timer_test unbalanced_timer +} + +nexthop_res_group_force_migrate_busy_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 120 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + $IP nexthop replace id 10 group 2 type resilient + nexthop_bucket_nhid_count_check 10 2 8 + check_err $? "All buckets expected to have migrated" + + log_test "Busy buckets force-migrated when NH removed" + + $IP nexthop flush &> /dev/null +} + nexthop_single_replace_test() { RET=0 @@ -299,6 +811,63 @@ nexthop_single_in_group_replace_err_test() nexthop_resource_set 9999 } +nexthop_single_in_res_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in resilient group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_res_group_replace_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_bucket_nhid_count_check 10 1 2 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in resilient group failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace +} + nexthop_single_in_group_delete_test() { RET=0 @@ -346,6 +915,57 @@ nexthop_single_in_group_delete_err_test() nexthop_resource_set 9999 } +nexthop_single_in_res_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 2 4 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in resilient group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_res_group_delete_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 type resilient buckets 6 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace + $IP nexthop del id 1 + + # We failed to replace the two nexthop buckets that were originally + # assigned to nhid 1. + nexthop_bucket_nhid_count_check 10 2 2 3 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 8 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in resilient group failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace +} + nexthop_replay_test() { RET=0 diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh new file mode 100755 index 000000000000..ee10b1a8933c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh @@ -0,0 +1,181 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the psample module. It makes use of netdevsim +# which periodically generates "sampled" packets. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + psample_enable_test + psample_group_num_test + psample_md_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/ +CAPTURE_FILE=$(mktemp) +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +# Available at https://github.com/Mellanox/libpsample +require_command psample + +psample_capture() +{ + rm -f $CAPTURE_FILE + + timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE +} + +psample_enable_test() +{ + RET=0 + + echo 1 > $PSAMPLE_DIR/enable + check_err $? "Failed to enable sampling when should not" + + echo 1 > $PSAMPLE_DIR/enable 2>/dev/null + check_fail $? "Sampling enablement succeeded when should fail" + + psample_capture + if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then + check_err 1 "Failed to capture sampled packets" + fi + + echo 0 > $PSAMPLE_DIR/enable + check_err $? "Failed to disable sampling when should not" + + echo 0 > $PSAMPLE_DIR/enable 2>/dev/null + check_fail $? "Sampling disablement succeeded when should fail" + + psample_capture + if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then + check_err 1 "Captured sampled packets when should not" + fi + + log_test "psample enable / disable" +} + +psample_group_num_test() +{ + RET=0 + + echo 1234 > $PSAMPLE_DIR/group_num + echo 1 > $PSAMPLE_DIR/enable + + psample_capture + grep -q -e "group 1234" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong group number" + + # New group number should only be used after disable / enable. + echo 4321 > $PSAMPLE_DIR/group_num + + psample_capture + grep -q -e "group 4321" $CAPTURE_FILE + check_fail $? "Group number changed while sampling is active" + + echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable + + psample_capture + grep -q -e "group 4321" $CAPTURE_FILE + check_err $? "Group number did not change after restarting sampling" + + log_test "psample group number" + + echo 0 > $PSAMPLE_DIR/enable +} + +psample_md_test() +{ + RET=0 + + echo 1 > $PSAMPLE_DIR/enable + + echo 1234 > $PSAMPLE_DIR/in_ifindex + echo 4321 > $PSAMPLE_DIR/out_ifindex + psample_capture + + grep -q -e "in-ifindex 1234" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong in-ifindex" + + grep -q -e "out-ifindex 4321" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong out-ifindex" + + echo 5 > $PSAMPLE_DIR/out_tc + psample_capture + + grep -q -e "out-tc 5" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong out-tc" + + echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc + psample_capture + + grep -q -e "out-tc " $CAPTURE_FILE + check_fail $? "Sampled packets reported with out-tc when should not" + + echo 1 > $PSAMPLE_DIR/out_tc + echo 10000 > $PSAMPLE_DIR/out_tc_occ_max + psample_capture + + grep -q -e "out-tc-occ " $CAPTURE_FILE + check_err $? "Sampled packets not reported with out-tc-occ when should" + + echo 0 > $PSAMPLE_DIR/out_tc_occ_max + psample_capture + + grep -q -e "out-tc-occ " $CAPTURE_FILE + check_fail $? "Sampled packets reported with out-tc-occ when should not" + + echo 10000 > $PSAMPLE_DIR/latency_max + psample_capture + + grep -q -e "latency " $CAPTURE_FILE + check_err $? "Sampled packets not reported with latency when should" + + echo 0 > $PSAMPLE_DIR/latency_max + psample_capture + + grep -q -e "latency " $CAPTURE_FILE + check_fail $? "Sampled packets reported with latency when should not" + + log_test "psample metadata" + + echo 0 > $PSAMPLE_DIR/enable +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + set +e +} + +cleanup() +{ + pre_cleanup + rm -f $CAPTURE_FILE + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 32b87cc77c8e..7bd7e776c266 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -8,10 +8,13 @@ /x86_64/debug_regs /x86_64/evmcs_test /x86_64/get_cpuid_test +/x86_64/get_msr_index_features /x86_64/kvm_pv_test +/x86_64/hyperv_clock /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test +/x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/smm_test /x86_64/state_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a6d61f451f88..67eebb53235f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -39,12 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 2d7eb6989e83..0f4258eaa629 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_DEV_PATH "/dev/kvm" #define KVM_MAX_VCPUS 512 /* @@ -133,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e5fbf16f725b..b8849a1aca79 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1697,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) { int ret; - ret = ioctl(vm->fd, cmd, arg); + ret = _vm_ioctl(vm, cmd, arg); TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", cmd, ret, errno, strerror(errno)); } +int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + return ioctl(vm->fd, cmd, arg); +} + /* * KVM system ioctl * diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 34465dc562d8..91ce1b5d480b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -10,8 +10,6 @@ #include "sparsebit.h" -#define KVM_DEV_PATH "/dev/kvm" - struct userspace_mem_region { struct kvm_userspace_memory_region region; struct sparsebit *unused_phy_pages; diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c new file mode 100644 index 000000000000..cb953df4d7d0 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_GET_MSR_INDEX_LIST and + * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static int kvm_num_index_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +static void test_get_msr_index(void) +{ + int old_res, res, kvm_fd, r; + struct kvm_msr_list *list; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_index_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_index_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0])); + list->nmsrs = old_res; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + + TEST_ASSERT(r == 0, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", + r); + TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical"); + free(list); + + close(kvm_fd); +} + +static int kvm_num_feature_msrs(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + TEST_ASSERT(r == -1 && errno == E2BIG, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i", + r); + + r = list->nmsrs; + free(list); + return r; +} + +struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs) +{ + struct kvm_msr_list *list; + int r; + + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + + TEST_ASSERT(r == 0, + "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", + r); + + return list; +} + +static void test_get_msr_feature(void) +{ + int res, old_res, i, kvm_fd; + struct kvm_msr_list *feature_list; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + old_res = kvm_num_feature_msrs(kvm_fd, 0); + TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); + + if (old_res != 1) { + res = kvm_num_feature_msrs(kvm_fd, 1); + TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); + TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); + } + + feature_list = kvm_get_msr_feature_list(kvm_fd, old_res); + TEST_ASSERT(old_res == feature_list->nmsrs, + "Unmatching number of msr indexes"); + + for (i = 0; i < feature_list->nmsrs; i++) + kvm_get_feature_msr(feature_list->indices[i]); + + free(feature_list); + close(kvm_fd); +} + +int main(int argc, char *argv[]) +{ + if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES)) + test_get_msr_feature(); + + test_get_msr_index(); +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c new file mode 100644 index 000000000000..ffbc4555c6e2 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021, Red Hat, Inc. + * + * Tests for Hyper-V clocksources + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; +} __packed; + +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 +#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 +#define HV_X64_MSR_TSC_FREQUENCY 0x40000022 +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 + +/* Simplified mul_u64_u64_shr() */ +static inline u64 mul_u64_u64_shr64(u64 a, u64 b) +{ + union { + u64 ll; + struct { + u32 low, high; + } l; + } rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + rh.l.low = c = rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + return rh.ll; +} + +static inline void nop_loop(void) +{ + int i; + + for (i = 0; i < 1000000; i++) + asm volatile("nop"); +} + +static inline void check_tsc_msr_rdtsc(void) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY); + GUEST_ASSERT(tsc_freq > 0); + + /* First, check MSR-based clocksource */ + r1 = rdtsc(); + t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + nop_loop(); + r2 = rdtsc(); + t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + + GUEST_ASSERT(r2 > r1 && t2 > t1); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100); +} + +static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) +{ + u64 r1, r2, t1, t2; + + /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */ + t1 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; + r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + + /* 10 ms tolerance */ + GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000); + nop_loop(); + + t2 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset; + r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); +} + +static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) +{ + u64 tsc_scale, tsc_offset; + + /* Set Guest OS id to enable Hyper-V emulation */ + GUEST_SYNC(1); + wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48); + GUEST_SYNC(2); + + check_tsc_msr_rdtsc(); + + GUEST_SYNC(3); + + /* Set up TSC page is disabled state, check that it's clean */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + + GUEST_SYNC(4); + + /* Set up TSC page is enabled state */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1); + GUEST_ASSERT(tsc_page->tsc_sequence != 0); + + GUEST_SYNC(5); + + check_tsc_msr_tsc_page(tsc_page); + + GUEST_SYNC(6); + + tsc_offset = tsc_page->tsc_offset; + /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */ + GUEST_SYNC(7); + GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset); + + nop_loop(); + + /* + * Enable Re-enlightenment and check that TSC page stays constant across + * KVM_SET_CLOCK. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1); + tsc_offset = tsc_page->tsc_offset; + tsc_scale = tsc_page->tsc_scale; + GUEST_SYNC(8); + GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset); + GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale); + + GUEST_SYNC(9); + + check_tsc_msr_tsc_page(tsc_page); + + /* + * Disable re-enlightenment and TSC page, check that KVM doesn't update + * it anymore. + */ + wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); + wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0); + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0); + memset(tsc_page, 0, sizeof(*tsc_page)); + + GUEST_SYNC(10); + GUEST_ASSERT(tsc_page->tsc_sequence == 0); + GUEST_ASSERT(tsc_page->tsc_offset == 0); + GUEST_ASSERT(tsc_page->tsc_scale == 0); + + GUEST_DONE(); +} + +#define VCPU_ID 0 + +static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) +{ + u64 tsc_freq, r1, r2, t1, t2; + s64 delta_ns; + + tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); + TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); + + /* First, check MSR-based clocksource */ + r1 = rdtsc(); + t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + nop_loop(); + r2 = rdtsc(); + t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + + TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); + + /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */ + delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq); + if (delta_ns < 0) + delta_ns = -delta_ns; + + /* 1% tolerance */ + TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100, + "Elapsed time does not match (MSR=%ld, TSC=%ld)", + (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq); +} + +int main(void) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + vm_vaddr_t tsc_page_gva; + int stage; + + vm = vm_create_default(VCPU_ID, 0, guest_main); + run = vcpu_state(vm, VCPU_ID); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, + "TSC page has to be page aligned\n"); + vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); + + host_check_tsc_msr_rdtsc(vm); + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + /* Keep in sync with guest_main() */ + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", + stage); + goto out; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, + "Stage %d: Unexpected register values vmexit, got %lx", + stage, (ulong)uc.args[1]); + + /* Reset kvmclock triggering TSC page update */ + if (stage == 7 || stage == 8 || stage == 10) { + struct kvm_clock_data clock = {0}; + + vm_ioctl(vm, KVM_SET_CLOCK, &clock); + } + } + +out: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c new file mode 100644 index 000000000000..12c558fc8074 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that KVM_SET_BOOT_CPU_ID works as intended + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#define _GNU_SOURCE /* for program_invocation_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define N_VCPU 2 +#define VCPU_ID0 0 +#define VCPU_ID1 1 + +static uint32_t get_bsp_flag(void) +{ + return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; +} + +static void guest_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() != 0); + + GUEST_DONE(); +} + +static void guest_not_bsp_vcpu(void *arg) +{ + GUEST_SYNC(1); + + GUEST_ASSERT(get_bsp_flag() == 0); + + GUEST_DONE(); +} + +static void test_set_boot_busy(struct kvm_vm *vm) +{ + int res; + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0); + TEST_ASSERT(res == -1 && errno == EBUSY, + "KVM_SET_BOOT_CPU_ID set while running vm"); +} + +static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct ucall uc; + int stage; + + for (stage = 0; stage < 2; stage++) { + + vcpu_run(vm, vcpuid); + + switch (get_ucall(vm, vcpuid, &uc)) { + case UCALL_SYNC: + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1, + "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); + test_set_boot_busy(vm); + break; + case UCALL_DONE: + TEST_ASSERT(stage == 1, + "Expected GUEST_DONE in stage 2, got stage %d", + stage); + break; + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", + (const char *)uc.args[0], __FILE__, + uc.args[1], uc.args[2], uc.args[3]); + default: + TEST_ASSERT(false, "Unexpected exit: %s", + exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + } + } +} + +static struct kvm_vm *create_vm(void) +{ + struct kvm_vm *vm; + uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2; + uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; + + pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages); + vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_create_irqchip(vm); + + return vm; +} + +static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code) +{ + if (bsp_code) + vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu); + else + vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu); + + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); +} + +static void run_vm_bsp(uint32_t bsp_vcpu) +{ + struct kvm_vm *vm; + bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1; + + vm = create_vm(); + + if (is_bsp_vcpu1) + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + + add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1); + add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1); + + run_vcpu(vm, VCPU_ID0); + run_vcpu(vm, VCPU_ID1); + + kvm_vm_free(vm); +} + +static void check_set_bsp_busy(void) +{ + struct kvm_vm *vm; + int res; + + vm = create_vm(); + + add_x86_vcpu(vm, VCPU_ID0, true); + add_x86_vcpu(vm, VCPU_ID1, false); + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu"); + + run_vcpu(vm, VCPU_ID0); + run_vcpu(vm, VCPU_ID1); + + res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); + TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + print_skip("set_boot_cpu_id not available"); + return 0; + } + + run_vm_bsp(VCPU_ID0); + run_vm_bsp(VCPU_ID1); + run_vm_bsp(VCPU_ID0); + + check_set_bsp_busy(); +} diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index d98fb85e201c..56dd0c6f2e96 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -19,10 +19,39 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture" -IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture" - -ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}" +IPV4_TESTS=" + ipv4_fcnal + ipv4_grp_fcnal + ipv4_res_grp_fcnal + ipv4_withv6_fcnal + ipv4_fcnal_runtime + ipv4_large_grp + ipv4_large_res_grp + ipv4_compat_mode + ipv4_fdb_grp_fcnal + ipv4_torture + ipv4_res_torture +" + +IPV6_TESTS=" + ipv6_fcnal + ipv6_grp_fcnal + ipv6_res_grp_fcnal + ipv6_fcnal_runtime + ipv6_large_grp + ipv6_large_res_grp + ipv6_compat_mode + ipv6_fdb_grp_fcnal + ipv6_torture + ipv6_res_torture +" + +ALL_TESTS=" + basic + basic_res + ${IPV4_TESTS} + ${IPV6_TESTS} +" TESTS="${ALL_TESTS}" VERBOSE=0 PAUSE_ON_FAIL=no @@ -232,6 +261,19 @@ check_nexthop() check_output "${out}" "${expected}" } +check_nexthop_bucket() +{ + local nharg="$1" + local expected="$2" + local out + + # remove the idle time since we cannot match it + out=$($IP nexthop bucket ${nharg} \ + | sed s/idle_time\ [0-9.]*\ // 2>/dev/null) + + check_output "${out}" "${expected}" +} + check_route() { local pfx="$1" @@ -308,6 +350,25 @@ check_large_grp() log_test $? 0 "Dump large (x$ecmp) ecmp groups" } +check_large_res_grp() +{ + local ipv=$1 + local buckets=$2 + local ipstr="" + + if [ $ipv -eq 4 ]; then + ipstr="172.16.1.2" + else + ipstr="2001:db8:91::2" + fi + + # create a resilient group with $buckets buckets and dump them + run_cmd "$IP nexthop add id 100 via $ipstr dev veth1" + run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets" + run_cmd "$IP nexthop bucket list" + log_test $? 0 "Dump large (x$buckets) nexthop buckets" +} + start_ip_monitor() { local mtype=$1 @@ -344,6 +405,15 @@ check_nexthop_fdb_support() fi } +check_nexthop_res_support() +{ + $IP nexthop help 2>&1 | grep -q resilient + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing resilient nexthop group support" + return $ksft_skip + fi +} + ipv6_fdb_grp_fcnal() { local rc @@ -666,6 +736,70 @@ ipv6_grp_fcnal() log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" } +ipv6_res_grp_fcnal() +{ + local rc + + echo + echo "IPv6 resilient groups functional" + echo "--------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # + # migration of nexthop buckets - equal weights + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0" + + run_cmd "$IP nexthop del id 63" + check_nexthop "id 102" \ + "id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 62 id 102 index 1 nhid 62" + log_test $? 0 "Nexthop buckets updated when entry is deleted" + + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 63 id 102 index 1 nhid 62" + log_test $? 0 "Nexthop buckets updated after replace" + + $IP nexthop flush >/dev/null 2>&1 + + # + # migration of nexthop buckets - unequal weights + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0" + + run_cmd "$IP nexthop del id 63" + check_nexthop "id 102" \ + "id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" + log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP" + + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" + log_test $? 0 "Nexthop buckets updated after replace - nECMP" +} + ipv6_fcnal_runtime() { local rc @@ -824,6 +958,22 @@ ipv6_large_grp() $IP nexthop flush >/dev/null 2>&1 } +ipv6_large_res_grp() +{ + echo + echo "IPv6 large resilient group (128k buckets)" + echo "-----------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + check_large_res_grp 6 $((128 * 1024)) + + $IP nexthop flush >/dev/null 2>&1 +} + ipv6_del_add_loop1() { while :; do @@ -874,11 +1024,67 @@ ipv6_torture() sleep 300 kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null # if we did not crash, success log_test 0 0 "IPv6 torture test" } +ipv6_res_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 type resilient + done >/dev/null 2>&1 +} + +ipv6_res_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv6 runtime resilient nexthop group torture" + echo "--------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0" + run_cmd "$IP route add 2001:db8:101::1 nhid 102" + run_cmd "$IP route add 2001:db8:101::2 nhid 102" + + ipv6_del_add_loop1 & + pid1=$! + ipv6_res_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn -6 veth1 \ + -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \ + -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null + + # if we did not crash, success + log_test 0 0 "IPv6 resilient nexthop group torture test" +} ipv4_fcnal() { @@ -1038,6 +1244,70 @@ ipv4_grp_fcnal() log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" } +ipv4_res_grp_fcnal() +{ + local rc + + echo + echo "IPv4 resilient groups functional" + echo "--------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # + # migration of nexthop buckets - equal weights + # + run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0" + + run_cmd "$IP nexthop del id 13" + check_nexthop "id 102" \ + "id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 12 id 102 index 1 nhid 12" + log_test $? 0 "Nexthop buckets updated when entry is deleted" + + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 13 id 102 index 1 nhid 12" + log_test $? 0 "Nexthop buckets updated after replace" + + $IP nexthop flush >/dev/null 2>&1 + + # + # migration of nexthop buckets - unequal weights + # + run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0" + + run_cmd "$IP nexthop del id 13" + check_nexthop "id 102" \ + "id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12" + log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP" + + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12" + log_test $? 0 "Nexthop buckets updated after replace - nECMP" +} + ipv4_withv6_fcnal() { local lladdr @@ -1259,6 +1529,22 @@ ipv4_large_grp() $IP nexthop flush >/dev/null 2>&1 } +ipv4_large_res_grp() +{ + echo + echo "IPv4 large resilient group (128k buckets)" + echo "-----------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + check_large_res_grp 4 $((128 * 1024)) + + $IP nexthop flush >/dev/null 2>&1 +} + sysctl_nexthop_compat_mode_check() { local sysctlname="net.ipv4.nexthop_compat_mode" @@ -1476,11 +1762,68 @@ ipv4_torture() sleep 300 kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null # if we did not crash, success log_test 0 0 "IPv4 torture test" } +ipv4_res_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 type resilient + done >/dev/null 2>&1 +} + +ipv4_res_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv4 runtime resilient nexthop group torture" + echo "--------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0" + run_cmd "$IP route add 172.16.101.1 nhid 102" + run_cmd "$IP route add 172.16.101.2 nhid 102" + + ipv4_del_add_loop1 & + pid1=$! + ipv4_res_grp_replace_loop & + pid2=$! + ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + pid3=$! + ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + pid4=$! + ip netns exec me mausezahn veth1 \ + -B 172.16.101.2 -A 172.16.1.1 -c 0 \ + -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null + + # if we did not crash, success + log_test 0 0 "IPv4 resilient nexthop group torture test" +} + basic() { echo @@ -1592,6 +1935,204 @@ basic() $IP nexthop flush >/dev/null 2>&1 } +check_nexthop_buckets_balance() +{ + local nharg=$1; shift + local ret + + while (($# > 0)); do + local selector=$1; shift + local condition=$1; shift + local count + + count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length) + (( $count $condition )) + ret=$? + if ((ret != 0)); then + return $ret + fi + done + + return 0 +} + +basic_res() +{ + echo + echo "Basic resilient nexthop group functional tests" + echo "----------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + run_cmd "$IP nexthop add id 1 dev veth1" + + # + # resilient nexthop group addition + # + + run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8" + log_test $? 0 "Add a nexthop group with default parameters" + + run_cmd "$IP nexthop get id 101" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Get a nexthop group with default parameters" + + run_cmd "$IP nexthop add id 102 group 1 type resilient + buckets 4 idle_timer 100 unbalanced_timer 5" + run_cmd "$IP nexthop get id 102" + check_nexthop "id 102" \ + "id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0" + log_test $? 0 "Get a nexthop group with non-default parameters" + + run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0" + log_test $? 2 "Add a nexthop group with 0 buckets" + + # + # resilient nexthop group replacement + # + + run_cmd "$IP nexthop replace id 101 group 1 type resilient + buckets 8 idle_timer 240 unbalanced_timer 80" + log_test $? 0 "Replace nexthop group parameters" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing parameters" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512" + log_test $? 0 "Replace idle timer" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing idle timer" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256" + log_test $? 0 "Replace unbalanced timer" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing unbalanced timer" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient" + log_test $? 0 "Replace with no parameters" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing no parameters" + + run_cmd "$IP nexthop replace id 101 group 1" + log_test $? 2 "Replace nexthop group type - implicit" + + run_cmd "$IP nexthop replace id 101 group 1 type mpath" + log_test $? 2 "Replace nexthop group type - explicit" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024" + log_test $? 2 "Replace number of nexthop buckets" + + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing with invalid parameters" + + # + # resilient nexthop buckets dump + # + + $IP nexthop flush >/dev/null 2>&1 + run_cmd "$IP nexthop add id 1 dev veth1" + run_cmd "$IP nexthop add id 2 dev veth3" + run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4" + run_cmd "$IP nexthop add id 201 group 1/2" + + check_nexthop_bucket "" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets" + + check_nexthop_bucket "list id 101" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets in a group" + + (( $($IP -j nexthop bucket list id 101 | + jq '[.[] | select(.bucket.idle_time > 0 and + .bucket.idle_time < 2)] | length') == 4 )) + log_test $? 0 "All nexthop buckets report a positive near-zero idle time" + + check_nexthop_bucket "list dev veth1" \ + "id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets with a specific nexthop device" + + check_nexthop_bucket "list nhid 2" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2" + log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier" + + run_cmd "$IP nexthop bucket list id 111" + log_test $? 2 "Dump all nexthop buckets in a non-existent group" + + run_cmd "$IP nexthop bucket list id 201" + log_test $? 2 "Dump all nexthop buckets in a non-resilient group" + + run_cmd "$IP nexthop bucket list dev bla" + log_test $? 255 "Dump all nexthop buckets using a non-existent device" + + run_cmd "$IP nexthop bucket list groups" + log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword" + + run_cmd "$IP nexthop bucket list fdb" + log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword" + + # + # resilient nexthop buckets get requests + # + + check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2" + log_test $? 0 "Get a valid nexthop bucket" + + run_cmd "$IP nexthop bucket get id 101 index 999" + log_test $? 2 "Get a nexthop bucket with valid group, but invalid index" + + run_cmd "$IP nexthop bucket get id 201 index 0" + log_test $? 2 "Get a nexthop bucket from a non-resilient group" + + run_cmd "$IP nexthop bucket get id 999 index 0" + log_test $? 2 "Get a nexthop bucket from a non-existent group" + + # + # tests for bucket migration + # + + $IP nexthop flush >/dev/null 2>&1 + + run_cmd "$IP nexthop add id 1 dev veth1" + run_cmd "$IP nexthop add id 2 dev veth3" + run_cmd "$IP nexthop add id 101 + group 1/2 type resilient buckets 10 + idle_timer 1 unbalanced_timer 20" + + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 5" \ + "nhid 2" "== 5" + log_test $? 0 "Initial bucket allocation" + + run_cmd "$IP nexthop replace id 101 + group 1,2/2,3 type resilient" + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 4" \ + "nhid 2" "== 6" + log_test $? 0 "Bucket allocation after replace" + + # Check that increase in idle timer does not make buckets appear busy. + run_cmd "$IP nexthop replace id 101 + group 1,2/2,3 type resilient + idle_timer 10" + run_cmd "$IP nexthop replace id 101 + group 1/2 type resilient" + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 5" \ + "nhid 2" "== 5" + log_test $? 0 "Buckets migrated after idle timer change" + + $IP nexthop flush >/dev/null 2>&1 +} + ################################################################################ # usage diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh new file mode 100755 index 000000000000..5148d97a5df8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh @@ -0,0 +1,366 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + h1.10 | | + h2.20 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | + $h1 | | + $h2 | +# | | | | | | +# +----|---------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|--------------------+ +# | SW | | | +# | +--|-------------------------------+ +----------------|------------------+ | +# | | + $swp1 BR1 (802.1ad) | | BR2 (802.1d) + $swp2 | | +# | | vid 100 pvid untagged | | | | | +# | | | | + $swp2.20 | | +# | | | | | | +# | | + vx100 (vxlan) | | + vx200 (vxlan) | | +# | | local 192.0.2.17 | | local 192.0.2.17 | | +# | | remote 192.0.2.34 | | remote 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | id 2000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | +# | +--------------------------------- + +-----------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|-----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR3 (802.1ad) | | | | BR3 (802.1d) | | +# | | + vx100 (vxlan) | | | | + vx200 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | | | +# | | | | | | + w1.20 | | +# | | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | | | | | | | | | +# | | | | | | | | | | +# | | + w2.10 | | | | + w2.20 | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 +} + +h1_destroy() +{ + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 20 v$h2 192.0.2.2/28 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + #### BR1 #### + ip link add name br1 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + #### BR2 #### + ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br2 address $(mac_get $swp2) + ip link set dev br2 up + + ip link set dev $rp1 up + rp1_set_addr + + #### VX100 #### + ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + #### VX200 #### + ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx200 up + + ip link set dev vx200 master br2 + + ip link set dev $swp2 up + ip link add name $swp2.20 link $swp2 type vlan id 20 + ip link set dev $swp2.20 master br2 + ip link set dev $swp2.20 up + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx200 nomaster + ip link set dev vx200 down + ip link del dev vx200 + + ip link del dev $swp2.20 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br2 down + ip link del dev br2 + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local vxlan_name=$1; shift + local vxlan_id=$1; shift + local vlan_id=$1; shift + local host_addr=$1; shift + local nh_addr=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br3 type bridge vlan_filtering 0 + ip link set dev br3 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br3 + ip link set dev w1 up + + ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \ + dstport "$VXPORT" + ip link set dev $vxlan_name up + bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev $vxlan_name master br3 + tc qdisc add dev $vxlan_name clsact + + simple_if_init w2 + vlan_create w2 $vlan_id vw2 $host_addr/28 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \ + 192.0.2.33 + + in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \ + 192.0.2.49 + + in_ns ns2 ip link add name w1.20 link w1 type vlan id 20 + in_ns ns2 ip link set dev w1.20 master br3 + in_ns ns2 ip link set dev w1.20 up +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge" + ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh index 66496659bea7..e134a5f529c9 100644 --- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh +++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh @@ -224,7 +224,7 @@ fib_ipv4_plen_test() ip -n $ns link set dev dummy1 up # Add two routes with the same key and different prefix length and - # make sure both are in hardware. It can be verfied that both are + # make sure both are in hardware. It can be verified that both are # sharing the same leaf by checking the /proc/net/fib_trie ip -n $ns route add 192.0.2.0/24 dev dummy1 ip -n $ns route add 192.0.2.0/25 dev dummy1 diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh new file mode 100755 index 000000000000..088b65e64d66 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -0,0 +1,361 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test traffic distribution when a wECMP route forwards traffic to two GRE +# tunnels. +# +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|------------------------+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (gre) + g1b (gre) | +# | loc=192.0.2.65 loc=192.0.2.81 | +# | rem=192.0.2.66 --. rem=192.0.2.82 --. | +# | tos=inherit | tos=inherit | | +# | .------------------' | | +# | | .------------------' | +# | v v | +# | + $ul1.111 (vlan) + $ul1.222 (vlan) | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | \ / | +# | \________________/ | +# | | | +# | + $ul1 | +# +------------|-------------------------------+ +# | +# +------------|-------------------------------+ +# | SW2 + $ul2 | +# | _______|________ | +# | / \ | +# | / \ | +# | + $ul2.111 (vlan) + $ul2.222 (vlan) | +# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 | +# | | | | +# | | '------------------. | +# | '------------------. | | +# | + g2a (gre) | + g2b (gre) | | +# | loc=192.0.2.66 | loc=192.0.2.82 | | +# | rem=192.0.2.65 --' rem=192.0.2.81 --' | +# | tos=inherit tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | 2001:db8:2::1/64 | | +# +-------------------|------------------------+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# | 2001:db8:2::2/64 | +# +-------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_ipv4 + multipath_ipv6 + multipath_ipv6_l4 +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 + ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +sw1_create() +{ + simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $ul1 v$ol1 + vlan_create $ul1 111 v$ol1 192.0.2.129/28 + vlan_create $ul1 222 v$ol1 192.0.2.145/28 + + tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1 + __simple_if_init g1a v$ol1 192.0.2.65/32 + ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + + tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1 + __simple_if_init g1b v$ol1 192.0.2.81/32 + ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + + ip -6 nexthop add id 101 dev g1a + ip -6 nexthop add id 102 dev g1b + ip nexthop add id 103 group 101/102 type resilient buckets 512 \ + idle_timer 0 + + ip route add vrf v$ol1 192.0.2.16/28 nhid 103 + ip route add vrf v$ol1 2001:db8:2::/64 nhid 103 +} + +sw1_destroy() +{ + ip route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 192.0.2.16/28 + + ip nexthop del id 103 + ip -6 nexthop del id 102 + ip -6 nexthop del id 101 + + ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + __simple_if_fini g1b 192.0.2.81/32 + tunnel_destroy g1b + + ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 222 + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64 +} + +sw2_create() +{ + simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + vlan_create $ul2 222 v$ol2 192.0.2.146/28 + + tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 + __simple_if_init g2a v$ol2 192.0.2.66/32 + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + + tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2 + __simple_if_init g2b v$ol2 192.0.2.82/32 + ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + + ip -6 nexthop add id 201 dev g2a + ip -6 nexthop add id 202 dev g2b + ip nexthop add id 203 group 201/202 type resilient buckets 512 \ + idle_timer 0 + + ip route add vrf v$ol2 192.0.2.0/28 nhid 203 + ip route add vrf v$ol2 2001:db8:1::/64 nhid 203 + + tc qdisc add dev $ul2 clsact + tc filter add dev $ul2 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul2 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw2_destroy() +{ + tc qdisc del dev $ul2 clsact + + ip route del vrf v$ol2 2001:db8:1::/64 + ip route del vrf v$ol2 192.0.2.0/28 + + ip nexthop del id 203 + ip -6 nexthop del id 202 + ip -6 nexthop del id 201 + + ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + __simple_if_fini g2b 192.0.2.82/32 + tunnel_destroy g2b + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 222 + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 + ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +multipath4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 0 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + # Generate 16384 echo requests, each with a random flow label. + for ((i=0; i < 16384; ++i)); do + ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null + done + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +multipath6_l4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +multipath_ipv4() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6() +{ + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6_l4() +{ + log_info "Running IPv6 L4 hash multipath tests" + multipath6_l4_test "ECMP" 1 1 + multipath6_l4_test "Weighted MP 2:1" 2 1 + multipath6_l4_test "Weighted MP 11:45" 11 45 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index be71012b8fc5..05c05e02bade 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -353,6 +353,11 @@ wait_for_offload() "$@" | grep -q offload } +wait_for_trap() +{ + "$@" | grep -q trap +} + until_counter_is() { local expr=$1; shift diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh new file mode 100755 index 000000000000..4898dd4118f1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -0,0 +1,400 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_test +" +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router1_create() +{ + vrf_create "vrf-r1" + ip link set dev $rp11 master vrf-r1 + ip link set dev $rp12 master vrf-r1 + ip link set dev $rp13 master vrf-r1 + + ip link set dev vrf-r1 up + ip link set dev $rp11 up + ip link set dev $rp12 up + ip link set dev $rp13 up + + ip address add 192.0.2.1/24 dev $rp11 + ip address add 2001:db8:1::1/64 dev $rp11 + + ip address add 169.254.2.12/24 dev $rp12 + ip address add fe80:2::12/64 dev $rp12 + + ip address add 169.254.3.13/24 dev $rp13 + ip address add fe80:3::13/64 dev $rp13 +} + +router1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-r1 + ip route del 198.51.100.0/24 vrf vrf-r1 + + ip address del fe80:3::13/64 dev $rp13 + ip address del 169.254.3.13/24 dev $rp13 + + ip address del fe80:2::12/64 dev $rp12 + ip address del 169.254.2.12/24 dev $rp12 + + ip address del 2001:db8:1::1/64 dev $rp11 + ip address del 192.0.2.1/24 dev $rp11 + + ip nexthop del id 103 + ip nexthop del id 101 + ip nexthop del id 102 + ip nexthop del id 106 + ip nexthop del id 104 + ip nexthop del id 105 + + ip link set dev $rp13 down + ip link set dev $rp12 down + ip link set dev $rp11 down + + vrf_destroy "vrf-r1" +} + +router2_create() +{ + vrf_create "vrf-r2" + ip link set dev $rp21 master vrf-r2 + ip link set dev $rp22 master vrf-r2 + ip link set dev $rp23 master vrf-r2 + + ip link set dev vrf-r2 up + ip link set dev $rp21 up + ip link set dev $rp22 up + ip link set dev $rp23 up + + ip address add 198.51.100.1/24 dev $rp21 + ip address add 2001:db8:2::1/64 dev $rp21 + + ip address add 169.254.2.22/24 dev $rp22 + ip address add fe80:2::22/64 dev $rp22 + + ip address add 169.254.3.23/24 dev $rp23 + ip address add fe80:3::23/64 dev $rp23 +} + +router2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-r2 + ip route del 192.0.2.0/24 vrf vrf-r2 + + ip address del fe80:3::23/64 dev $rp23 + ip address del 169.254.3.23/24 dev $rp23 + + ip address del fe80:2::22/64 dev $rp22 + ip address del 169.254.2.22/24 dev $rp22 + + ip address del 2001:db8:2::1/64 dev $rp21 + ip address del 198.51.100.1/24 dev $rp21 + + ip nexthop del id 201 + ip nexthop del id 202 + ip nexthop del id 204 + ip nexthop del id 205 + + ip link set dev $rp23 down + ip link set dev $rp22 down + ip link set dev $rp21 down + + vrf_destroy "vrf-r2" +} + +routing_nh_obj() +{ + ip nexthop add id 101 via 169.254.2.22 dev $rp12 + ip nexthop add id 102 via 169.254.3.23 dev $rp13 + ip nexthop add id 103 group 101/102 type resilient buckets 512 \ + idle_timer 0 + ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103 + + ip nexthop add id 104 via fe80:2::22 dev $rp12 + ip nexthop add id 105 via fe80:3::23 dev $rp13 + ip nexthop add id 106 group 104/105 type resilient buckets 512 \ + idle_timer 0 + ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106 + + ip nexthop add id 201 via 169.254.2.12 dev $rp22 + ip nexthop add id 202 via 169.254.3.13 dev $rp23 + ip nexthop add id 203 group 201/202 type resilient buckets 512 \ + idle_timer 0 + ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203 + + ip nexthop add id 204 via fe80:2::12 dev $rp22 + ip nexthop add id 205 via fe80:3::13 dev $rp23 + ip nexthop add id 206 group 204/205 type resilient buckets 512 \ + idle_timer 0 + ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206 +} + +multipath4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the provided weights. + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + # Restore settings. + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_l4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the provided weights. + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +multipath_test() +{ + # Without an idle timer, weight replacement should happen immediately. + log_info "Running multipath tests without an idle timer" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 0 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 0 + + log_info "Running IPv4 multipath tests" + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 2 1 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 2 1 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 106 group 104,1/105,1 type resilient + + # With an idle timer, weight replacement should not happen, so the + # expected ratio should always be the initial one (1:1). + log_info "Running multipath tests with an idle timer of 120 seconds" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 120 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 120 + + log_info "Running IPv4 multipath tests" + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 1 1 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 1 1 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 1 1 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 1 1 + + ip nexthop replace id 106 group 104,1/105,1 type resilient + + # With a short idle timer and enough idle time, weight replacement + # should happen. + log_info "Running multipath tests with an idle timer of 5 seconds" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 5 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 5 + + log_info "Running IPv4 multipath tests" + sleep 10 + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + sleep 10 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 2 1 + sleep 10 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + sleep 10 + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + sleep 10 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 2 1 + sleep 10 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 106 group 104,1/105,1 type resilient +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp13=${NETIFS[p5]} + rp23=${NETIFS[p6]} + + rp21=${NETIFS[p7]} + h2=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +ip nexthop ls >/dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "Nexthop objects not supported; skipping tests" + exit 0 +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +routing_nh_obj + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index ce6bea9675c0..0ccb1dda099a 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -658,7 +658,7 @@ test_ecn_decap() # In accordance with INET_ECN_decapsulate() __test_ecn_decap 00 00 0x00 __test_ecn_decap 01 01 0x01 - __test_ecn_decap 02 01 0x02 + __test_ecn_decap 02 01 0x01 __test_ecn_decap 01 03 0x03 __test_ecn_decap 02 03 0x03 test_ecn_decap_error diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 964db9ed544f..fe990d8696a9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -11,6 +11,7 @@ ksft_skip=4 timeout=30 mptcp_connect="" capture=0 +do_all_tests=1 TEST_COUNT=0 @@ -121,12 +122,6 @@ reset_with_add_addr_timeout() -j DROP } -for arg in "$@"; do - if [ "$arg" = "-c" ]; then - capture=1 - fi -done - ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" @@ -284,14 +279,19 @@ do_transfer() let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 - sleep 1 - - while [ $counter -le $rm_nr_ns1 ] - do - ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + id=${dump[1]} sleep 1 - let counter+=1 - done + + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $id + sleep 1 + let counter+=1 + let id+=1 + done + fi else sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl flush @@ -318,14 +318,19 @@ do_transfer() let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 - sleep 1 - - while [ $counter -le $rm_nr_ns2 ] - do - ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + id=${dump[1]} sleep 1 - let counter+=1 - done + + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $id + sleep 1 + let counter+=1 + let id+=1 + done + fi else sleep 1 ip netns exec ${connector_ns} ./pm_nl_ctl flush @@ -610,11 +615,22 @@ chk_rm_nr() { local rm_addr_nr=$1 local rm_subflow_nr=$2 + local invert=${3:-""} local count local dump_stats + local addr_ns + local subflow_ns + + if [ -z $invert ]; then + addr_ns=$ns1 + subflow_ns=$ns2 + elif [ $invert = "invert" ]; then + addr_ns=$ns2 + subflow_ns=$ns1 + fi printf "%-39s %s" " " "rm " - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` + count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$rm_addr_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" @@ -625,7 +641,7 @@ chk_rm_nr() fi echo -n " - sf " - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` + count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$rm_subflow_nr" ]; then echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" @@ -833,7 +849,7 @@ remove_tests() run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert # subflow and signal, remove reset @@ -869,6 +885,29 @@ remove_tests() chk_join_nr "flush subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 + + # subflows flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush subflows" 3 3 3 + chk_rm_nr 3 3 + + # addresses flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush addresses" 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert } add_tests() @@ -945,7 +984,7 @@ ipv6_tests() run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow chk_join_nr "remove single address IPv6" 1 1 1 chk_add_nr 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert # subflow and signal IPv6, remove reset @@ -1088,7 +1127,7 @@ add_addr_ports_tests() run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address with port" 1 1 1 chk_add_nr 1 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert # subflow and signal with port, remove reset @@ -1221,7 +1260,8 @@ usage() echo " -4 v4mapped_tests" echo " -b backup_tests" echo " -p add_addr_ports_tests" - echo " -c syncookies_tests" + echo " -k syncookies_tests" + echo " -c capture pcap files" echo " -h help" } @@ -1235,12 +1275,24 @@ make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT -if [ -z $1 ]; then +for arg in "$@"; do + # check for "capture" arg before launching tests + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then + capture=1 + fi + + # exception for the capture option, the rest means: a part of the tests + if [ "${arg}" != "-c" ]; then + do_all_tests=0 + fi +done + +if [ $do_all_tests -eq 1 ]; then all_tests exit $ret fi -while getopts 'fsltra64bpch' opt; do +while getopts 'fsltra64bpkch' opt; do case $opt in f) subflows_tests @@ -1272,9 +1324,11 @@ while getopts 'fsltra64bpch' opt; do p) add_addr_ports_tests ;; - c) + k) syncookies_tests ;; + c) + ;; h | *) usage ;; diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 7b01b7c2ec10..066efd30e294 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -30,25 +30,25 @@ struct reuse_opts { }; struct reuse_opts unreusable_opts[12] = { - {0, 0, 0, 0}, - {0, 0, 0, 1}, - {0, 0, 1, 0}, - {0, 0, 1, 1}, - {0, 1, 0, 0}, - {0, 1, 0, 1}, - {0, 1, 1, 0}, - {0, 1, 1, 1}, - {1, 0, 0, 0}, - {1, 0, 0, 1}, - {1, 0, 1, 0}, - {1, 0, 1, 1}, + {{0, 0}, {0, 0}}, + {{0, 0}, {0, 1}}, + {{0, 0}, {1, 0}}, + {{0, 0}, {1, 1}}, + {{0, 1}, {0, 0}}, + {{0, 1}, {0, 1}}, + {{0, 1}, {1, 0}}, + {{0, 1}, {1, 1}}, + {{1, 0}, {0, 0}}, + {{1, 0}, {0, 1}}, + {{1, 0}, {1, 0}}, + {{1, 0}, {1, 1}}, }; struct reuse_opts reusable_opts[4] = { - {1, 1, 0, 0}, - {1, 1, 0, 1}, - {1, 1, 1, 0}, - {1, 1, 1, 1}, + {{1, 1}, {0, 0}}, + {{1, 1}, {0, 1}}, + {{1, 1}, {1, 0}}, + {{1, 1}, {1, 1}}, }; int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 431296c0f91c..427d94816f2d 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -371,6 +371,88 @@ else ip netns exec nsr1 nft list ruleset fi +# Another test: +# Add bridge interface br0 to Router1, with NAT enabled. +ip -net nsr1 link add name br0 type bridge +ip -net nsr1 addr flush dev veth0 +ip -net nsr1 link set up dev veth0 +ip -net nsr1 link set veth0 master br0 +ip -net nsr1 addr add 10.0.1.1/24 dev br0 +ip -net nsr1 addr add dead:1::1/64 dev br0 +ip -net nsr1 link set up dev br0 + +ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null + +# br0 with NAT enabled. +ip netns exec nsr1 nft -f - <<EOF +flush table ip nat +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" counter masquerade + } +} +EOF + +if test_tcp_forwarding_nat ns1 ns2; then + echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" +else + echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 + ip netns exec nsr1 nft list ruleset + ret=1 +fi + +# Another test: +# Add bridge interface br0 to Router1, with NAT and VLAN. +ip -net nsr1 link set veth0 nomaster +ip -net nsr1 link set down dev veth0 +ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10 +ip -net nsr1 link set up dev veth0 +ip -net nsr1 link set up dev veth0.10 +ip -net nsr1 link set veth0.10 master br0 + +ip -net ns1 addr flush dev eth0 +ip -net ns1 link add link eth0 name eth0.10 type vlan id 10 +ip -net ns1 link set eth0 up +ip -net ns1 link set eth0.10 up +ip -net ns1 addr add 10.0.1.99/24 dev eth0.10 +ip -net ns1 route add default via 10.0.1.1 +ip -net ns1 addr add dead:1::99/64 dev eth0.10 + +if test_tcp_forwarding_nat ns1 ns2; then + echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" +else + echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 + ip netns exec nsr1 nft list ruleset + ret=1 +fi + +# restore test topology (remove bridge and VLAN) +ip -net nsr1 link set veth0 nomaster +ip -net nsr1 link set veth0 down +ip -net nsr1 link set veth0.10 down +ip -net nsr1 link delete veth0.10 type vlan +ip -net nsr1 link delete br0 type bridge +ip -net ns1 addr flush dev eth0.10 +ip -net ns1 link set eth0.10 down +ip -net ns1 link set eth0 down +ip -net ns1 link delete eth0.10 type vlan + +# restore address in ns1 and nsr1 +ip -net ns1 link set eth0 up +ip -net ns1 addr add 10.0.1.99/24 dev eth0 +ip -net ns1 route add default via 10.0.1.1 +ip -net ns1 addr add dead:1::99/64 dev eth0 +ip -net ns1 route add default via dead:1::1 +ip -net nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net nsr1 addr add dead:1::1/64 dev veth0 +ip -net nsr1 link set up dev veth0 + KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) SPI1=$RANDOM diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d42115e4284d..8b0cd421ebd3 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -101,7 +101,7 @@ endef ifeq ($(CAN_BUILD_I386),1) $(BINARIES_32): CFLAGS += -m32 $(BINARIES_32): LDLIBS += -lrt -ldl -lm -$(BINARIES_32): %_32: %.c +$(BINARIES_32): $(OUTPUT)/%_32: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) endif @@ -109,7 +109,7 @@ endif ifeq ($(CAN_BUILD_X86_64),1) $(BINARIES_64): CFLAGS += -m64 $(BINARIES_64): LDLIBS += -lrt -ldl -$(BINARIES_64): %_64: %.c +$(BINARIES_64): $(OUTPUT)/%_64: %.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) endif |